re-common 10.0.40__tar.gz → 10.0.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {re_common-10.0.40/re_common.egg-info → re_common-10.0.42}/PKG-INFO +2 -10
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +17 -2
- re_common-10.0.42/re_common/v2/baselibrary/s3object/baseaioboto3.py +48 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/base.py +2 -2
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_processer.py +52 -63
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_reader.py +9 -6
- re_common-10.0.42/re_common/v2/baselibrary/tools/dir_file_tools.py +27 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/list_tools.py +18 -1
- re_common-10.0.42/re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
- re_common-10.0.42/re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/api_net_utils.py +49 -21
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basetime.py +17 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/db.py +19 -1
- re_common-10.0.42/re_common/v2/baselibrary/utils/pinyin_utils.py +178 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/string_bool.py +2 -1
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/stringutils.py +41 -0
- re_common-10.0.42/re_common/vip/proxy/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42/re_common.egg-info}/PKG-INFO +2 -10
- {re_common-10.0.40 → re_common-10.0.42}/re_common.egg-info/SOURCES.txt +6 -0
- {re_common-10.0.40 → re_common-10.0.42}/setup.py +1 -1
- {re_common-10.0.40 → re_common-10.0.42}/LICENSE +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/README.md +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/pyproject.toml +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/baseabs/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/baseabs/baseabs.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/mbuilder.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/moudle.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/msqlite3.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/mysql.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/sql_factory.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/MThreadingRun.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/mythreading.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/pakge_other/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/pakge_other/socks.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/config_factory.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/ini_config.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/toml_config.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/temporary/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/temporary/envdata.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/mrequest.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/requests_request.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/contrast_db3.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/copy_file.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/db3_2_sizedb3.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/foreachgz.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/get_attr.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/image_to_pdf.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/java_code_deal.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/javacode.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mdb_db3.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/merge_file.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/merge_gz_file.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mongo_tools.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_file.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/use_mv.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mpandas/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/myparsel.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/rename_dir_file.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/sequoiadb_utils.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/split_line_to_many.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/stringtodicts.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/workwechant_bot.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseaiohttp.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseaiomysql.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseallstep.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseavro.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseboto3.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basecsv.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basedict.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basedir.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseencode.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseencoding.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseesdsl.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseexcel.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseexcept.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basefile.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseftp.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basegzip.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basehdfs.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basehttpx.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseip.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basejson.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baselist.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basemotor.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basemssql.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseodbc.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepandas.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepeewee.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepika.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepydash.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepymongo.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basequeue.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baserar.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baserequest.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseset.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basesmb.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basestring.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basetime.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basetuple.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseurl.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basezip.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/bottomutils.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/mdeprecated.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/mlamada.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/msginfo.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/requests_core.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/fateadm.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/importfun.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/mfaker.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/my_abc/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/my_abc/better_abc.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/mylogger.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/myredisclient.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/pipupgrade.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/ringlist.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/version_compare.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/ydmhttp.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/lazy_import.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/loggerfacade.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/mysqlfacade.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/now.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/sqlite3facade.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/use/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/use/mq_use_facade.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/use/proxy_use_facade.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/base_dict_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/baseavro_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/basefile_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/basemssql_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/baseodbc_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/basepandas_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/get_attr_test/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/get_attr_test/settings.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/idencode_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/iniconfig_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/ip_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/merge_file_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mfaker_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mm3_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mylogger_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/myparsel_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mysql_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/pymongo_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/split_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/sqlite3_merge_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/sqlite3_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/tomlconfig_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/use_tools_test/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/user/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/assignment_expressions.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/mydash/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/mydash/test1.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/pydashstudio/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/pydashstudio/first.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/streamlitstudio/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/streamlitstudio/first_app.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/streamlitstudio/uber_pickups.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/baseencodeid.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/full_doi_path.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/rel_tools.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/decorators/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/decorators/utils.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/test.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/s3object/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/s3object/baseboto3.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/WeChatRobot.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/ac_ahocorasick.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/concurrency.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_writer.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/dict_tools.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/dolphinscheduler.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_base_processor.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_data_processer.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_line_processor.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/resume_tracker.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/search_hash_tools.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/text_matcher.py +0 -0
- {re_common-10.0.40/re_common/v2/baselibrary/utils → re_common-10.0.42/re_common/v2/baselibrary/tools/tree_processor}/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/unionfind_tools.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/BusinessStringUtil.py +0 -0
- {re_common-10.0.40/re_common/vip → re_common-10.0.42/re_common/v2/baselibrary/utils}/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/author_smi.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/base_string_similarity.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basedict.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basehdfs.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basepika.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/elasticsearch.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/json_cls.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/mq.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/n_ary_expression_tree.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/string_clear.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/string_smi.py +0 -0
- {re_common-10.0.40/re_common/vip/proxy → re_common-10.0.42/re_common/vip}/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/base_step_process.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/baseencodeid.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/changetaskname.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/core_var.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/mmh3Hash.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/allproxys.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/allproxys_thread.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/cnki_proxy.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/kuaidaili.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/proxy_all.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/update_kuaidaili_0.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/wanfang_proxy.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/wp_proxy_all.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/read_rawid_to_txt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformBookTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/__init__.py +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common.egg-info/dependency_links.txt +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/re_common.egg-info/top_level.txt +0 -0
- {re_common-10.0.40 → re_common-10.0.42}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: re_common
|
|
3
|
-
Version: 10.0.
|
|
3
|
+
Version: 10.0.42
|
|
4
4
|
Summary: a library about all python projects
|
|
5
5
|
Home-page: https://gitee.com/xujiangios/re-common
|
|
6
6
|
Author: vic
|
|
@@ -11,14 +11,6 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.6
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Dynamic: author
|
|
15
|
-
Dynamic: author-email
|
|
16
|
-
Dynamic: classifier
|
|
17
|
-
Dynamic: description
|
|
18
|
-
Dynamic: description-content-type
|
|
19
|
-
Dynamic: home-page
|
|
20
|
-
Dynamic: requires-python
|
|
21
|
-
Dynamic: summary
|
|
22
14
|
|
|
23
15
|
|
|
24
16
|
这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
# 某些业务中的字符串处理 算是特定场景的工具 不算通用工具
|
|
2
|
+
import itertools
|
|
2
3
|
import re
|
|
3
4
|
|
|
5
|
+
from rapidfuzz.fuzz import partial_token_set_ratio
|
|
6
|
+
|
|
4
7
|
from re_common.v2.baselibrary.utils.author_smi import AuthorRatio
|
|
5
8
|
from re_common.v2.baselibrary.utils.string_bool import is_all_symbols
|
|
6
9
|
from re_common.v2.baselibrary.utils.string_clear import rel_clear
|
|
@@ -199,7 +202,7 @@ def deal_num(num_str):
|
|
|
199
202
|
return num_str.lower().strip()
|
|
200
203
|
|
|
201
204
|
|
|
202
|
-
def clear_author_1st(author_str:str):
|
|
205
|
+
def clear_author_1st(author_str: str):
|
|
203
206
|
# 清理括号 防止前面流程没有清理干净
|
|
204
207
|
author_str = re.sub("\\[.*?]", "", author_str)
|
|
205
208
|
author_str = re.sub("\\(.*?\\)", "", author_str)
|
|
@@ -209,6 +212,7 @@ def clear_author_1st(author_str:str):
|
|
|
209
212
|
|
|
210
213
|
return author_str
|
|
211
214
|
|
|
215
|
+
|
|
212
216
|
def is_same_author(a1, a2):
|
|
213
217
|
if get_alphabetic_ratio(a1.strip()) > 0.7 and get_alphabetic_ratio(a2.strip()) > 0.7:
|
|
214
218
|
author_similar_ = AuthorRatio(a1.strip(), a2.strip())
|
|
@@ -217,4 +221,15 @@ def is_same_author(a1, a2):
|
|
|
217
221
|
else:
|
|
218
222
|
if rel_clear(a1.strip()) == rel_clear(a2.strip()):
|
|
219
223
|
return True
|
|
220
|
-
return False
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def abs_smi(abs_l1, abs_l2):
|
|
228
|
+
abs_l1 = [cleared for cleared in map(rel_clear, abs_l1) if cleared]
|
|
229
|
+
abs_l2 = [cleared for cleared in map(rel_clear, abs_l2) if cleared]
|
|
230
|
+
lists_max = []
|
|
231
|
+
for abs1, abs2 in list(itertools.product(abs_l1, abs_l2)):
|
|
232
|
+
max_smi = partial_token_set_ratio(abs1, abs2, processor=rel_clear)
|
|
233
|
+
lists_max.append(max_smi)
|
|
234
|
+
|
|
235
|
+
return max(lists_max)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import aioboto3
|
|
2
|
+
import aiofiles
|
|
3
|
+
from aiobotocore.config import AioConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# config = AioConfig(connect_timeout=600000, read_timeout=600000, retries={'max_attempts': 3},
|
|
7
|
+
# max_pool_connections=10)
|
|
8
|
+
|
|
9
|
+
class BaseAioBoto3(object):
|
|
10
|
+
|
|
11
|
+
def __init__(self, aws_access_key_id, aws_secret_access_key, endpoint_url,
|
|
12
|
+
config=AioConfig(max_pool_connections=10)):
|
|
13
|
+
self.aws_access_key_id = aws_access_key_id
|
|
14
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
15
|
+
self.endpoint_url = endpoint_url
|
|
16
|
+
self.config = config
|
|
17
|
+
self.boto_session = None
|
|
18
|
+
|
|
19
|
+
async def initialize_class_variable(self):
|
|
20
|
+
if self.boto_session is None:
|
|
21
|
+
self.boto_session = aioboto3.Session(
|
|
22
|
+
aws_access_key_id=self.aws_access_key_id,
|
|
23
|
+
aws_secret_access_key=self.aws_secret_access_key,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
async def read_minio_data(self, bucket, key):
|
|
27
|
+
await self.initialize_class_variable()
|
|
28
|
+
async with self.boto_session.client("s3", endpoint_url=self.endpoint_url, config=self.config) as s3:
|
|
29
|
+
s3_ob = await s3.get_object(Bucket=bucket, Key=key)
|
|
30
|
+
result = await s3_ob["Body"].read()
|
|
31
|
+
return result
|
|
32
|
+
|
|
33
|
+
# 异步下载大文件
|
|
34
|
+
async def download_file(self, bucket: str, key: str, local_path: str):
|
|
35
|
+
await self.initialize_class_variable()
|
|
36
|
+
async with self.boto_session.client("s3", endpoint_url=self.endpoint_url, config=self.config) as s3:
|
|
37
|
+
response = await s3.get_object(Bucket=bucket, Key=key)
|
|
38
|
+
body = response["Body"]
|
|
39
|
+
|
|
40
|
+
# 用异步方式写入本地
|
|
41
|
+
async with aiofiles.open(local_path, "wb") as f:
|
|
42
|
+
while True:
|
|
43
|
+
chunk = await body.read(10 * 1024 * 1024) # 每次读 10MB
|
|
44
|
+
if not chunk:
|
|
45
|
+
break
|
|
46
|
+
await f.write(chunk)
|
|
47
|
+
|
|
48
|
+
return local_path
|
{re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/base.py
RENAMED
|
@@ -4,9 +4,9 @@ from typing import List, Generator
|
|
|
4
4
|
|
|
5
5
|
class BaseFileReader(ABC):
|
|
6
6
|
|
|
7
|
-
def __init__(self, batch_size: int = 10000):
|
|
7
|
+
def __init__(self, batch_size: int = 10000, read_model: int = 1):
|
|
8
8
|
self.batch_size = batch_size
|
|
9
|
-
self.read_model =
|
|
9
|
+
self.read_model = read_model
|
|
10
10
|
|
|
11
11
|
@abstractmethod
|
|
12
12
|
def list_files(self, path: str) -> List[str]:
|
|
@@ -43,6 +43,15 @@ class DatabaseHandler:
|
|
|
43
43
|
)
|
|
44
44
|
conn.commit()
|
|
45
45
|
|
|
46
|
+
def get_processed_files_count(self):
|
|
47
|
+
"""查看db3存储了多少成功的记录"""
|
|
48
|
+
with FileLock(self.lock_file):
|
|
49
|
+
with sqlite3.connect(self.db_file) as conn:
|
|
50
|
+
cursor = conn.cursor()
|
|
51
|
+
cursor.execute("SELECT COUNT(*) FROM processed_files")
|
|
52
|
+
count = cursor.fetchone()[0]
|
|
53
|
+
return count
|
|
54
|
+
|
|
46
55
|
def save_processed_files_many(self, file_paths):
|
|
47
56
|
"""批量保存处理过的文件路径"""
|
|
48
57
|
if not file_paths:
|
|
@@ -110,6 +119,21 @@ class DatabaseHandler:
|
|
|
110
119
|
print(f"伪造处理记录时出错: {str(e)}")
|
|
111
120
|
|
|
112
121
|
|
|
122
|
+
def on_retry(retry_state):
|
|
123
|
+
# 每次抛错进入该函数打印消息
|
|
124
|
+
exc = retry_state.outcome.exception()
|
|
125
|
+
tb = ''.join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
|
126
|
+
print(tb)
|
|
127
|
+
print(
|
|
128
|
+
f"处理文件 {retry_state.args[0]} 时发生错误: {exc},正在重试 {retry_state.attempt_number}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def on_retry_error(retry_state):
|
|
132
|
+
# 最后抛错后调用
|
|
133
|
+
print(f"处理文件 {retry_state.args[0]} 失败,达到重试上限")
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
|
|
113
137
|
class DataProcessor:
|
|
114
138
|
def __init__(
|
|
115
139
|
self,
|
|
@@ -249,38 +273,6 @@ class DataProcessor:
|
|
|
249
273
|
for file_path in all_files:
|
|
250
274
|
yield file_path
|
|
251
275
|
|
|
252
|
-
@retry(stop=stop_after_attempt(3),
|
|
253
|
-
wait=wait_random(min=10, max=30),
|
|
254
|
-
retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
|
|
255
|
-
reraise=True)
|
|
256
|
-
async def _batch_process_file(self, hdfs_file_path: str, process_func: Callable[[str], Any],
|
|
257
|
-
write_dir: str = None):
|
|
258
|
-
"""批量更新所有 gz 文件"""
|
|
259
|
-
# all_succeed = True
|
|
260
|
-
# for hdfs_file_path in self.get_file_list(hdfs_dir):
|
|
261
|
-
# if self.db_handler.is_file_processed(hdfs_file_path):
|
|
262
|
-
# print(f"跳过已处理文件: {hdfs_file_path}")
|
|
263
|
-
# continue # 如果文件已处理,跳过
|
|
264
|
-
# succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
|
|
265
|
-
# if succeed is False:
|
|
266
|
-
# all_succeed = False
|
|
267
|
-
#
|
|
268
|
-
# if all_succeed:
|
|
269
|
-
# # 处理完成后删除数据库文件
|
|
270
|
-
# try:
|
|
271
|
-
# if os.path.exists(self.db_file):
|
|
272
|
-
# os.remove(self.db_file)
|
|
273
|
-
# print(f"已删除断点重试文件: {self.db_file}")
|
|
274
|
-
# return True
|
|
275
|
-
# except Exception as e:
|
|
276
|
-
# print(f"删除断点重试文件失败: {e}")
|
|
277
|
-
# return False
|
|
278
|
-
if self.db_handler.is_file_processed(hdfs_file_path):
|
|
279
|
-
print(f"跳过已处理文件: {hdfs_file_path}")
|
|
280
|
-
return True # 如果文件已处理,跳过
|
|
281
|
-
succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
|
|
282
|
-
return succeed
|
|
283
|
-
|
|
284
276
|
async def process_file_bulk(self, hdfs_file_path, process_func, write_dir):
|
|
285
277
|
"""按批次处理单个文件,批量数据传递给处理函数"""
|
|
286
278
|
# 获取文件的数据总量
|
|
@@ -391,36 +383,6 @@ class DataProcessor:
|
|
|
391
383
|
succeed = await self._batch_process_file(hdfs_file_path, process_func, write_dir)
|
|
392
384
|
if succeed is False:
|
|
393
385
|
all_succeed = False
|
|
394
|
-
if all_succeed:
|
|
395
|
-
# 处理完成后删除数据库文件
|
|
396
|
-
try:
|
|
397
|
-
if os.path.exists(self.db_file):
|
|
398
|
-
os.remove(self.db_file)
|
|
399
|
-
print(f"已删除断点重试文件: {self.db_file}")
|
|
400
|
-
return True
|
|
401
|
-
except Exception as e:
|
|
402
|
-
print(f"删除断点重试文件失败: {e}")
|
|
403
|
-
return False
|
|
404
|
-
|
|
405
|
-
@retry(stop=stop_after_attempt(3),
|
|
406
|
-
wait=wait_random(min=10, max=30),
|
|
407
|
-
retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
|
|
408
|
-
reraise=True)
|
|
409
|
-
async def _batch_process_file_bulk(self, hdfs_file_path: str, process_func: Callable[[List[str]], Any],
|
|
410
|
-
write_dir: str = None):
|
|
411
|
-
"""批量处理 gz 文件中的数据"""
|
|
412
|
-
# 获取所有文件
|
|
413
|
-
# all_succeed = True
|
|
414
|
-
# for hdfs_file_path in self.get_file_list(hdfs_dir):
|
|
415
|
-
# # 查看是否跳过文件
|
|
416
|
-
# if self.db_handler.is_file_processed(hdfs_file_path):
|
|
417
|
-
# print(f"跳过已处理文件: {hdfs_file_path}")
|
|
418
|
-
# continue # 跳过已处理文件
|
|
419
|
-
# # 开始批量处理文件
|
|
420
|
-
# succeed = await self.retry_process_file_bulk(hdfs_file_path, process_func, write_dir)
|
|
421
|
-
# if succeed is False:
|
|
422
|
-
# all_succeed = False
|
|
423
|
-
#
|
|
424
386
|
# if all_succeed:
|
|
425
387
|
# # 处理完成后删除数据库文件
|
|
426
388
|
# try:
|
|
@@ -430,7 +392,17 @@ class DataProcessor:
|
|
|
430
392
|
# return True
|
|
431
393
|
# except Exception as e:
|
|
432
394
|
# print(f"删除断点重试文件失败: {e}")
|
|
433
|
-
|
|
395
|
+
return all_succeed
|
|
396
|
+
|
|
397
|
+
@retry(stop=stop_after_attempt(3),
|
|
398
|
+
wait=wait_random(min=10, max=30),
|
|
399
|
+
# retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
|
|
400
|
+
before_sleep=on_retry, # 每次抛错后使用
|
|
401
|
+
retry_error_callback=on_retry_error, # 如果最后没成功 返回 False
|
|
402
|
+
reraise=True) # 如果函数一直失败,重试结束时会 重新抛出最后一次调用时的原始异常。
|
|
403
|
+
async def _batch_process_file_bulk(self, hdfs_file_path: str, process_func: Callable[[List[str]], Any],
|
|
404
|
+
write_dir: str = None):
|
|
405
|
+
"""批量处理 gz 文件中的数据"""
|
|
434
406
|
# 查看是否跳过文件
|
|
435
407
|
if self.db_handler.is_file_processed(hdfs_file_path):
|
|
436
408
|
print(f"跳过已处理文件: {hdfs_file_path}")
|
|
@@ -439,6 +411,21 @@ class DataProcessor:
|
|
|
439
411
|
succeed = await self.retry_process_file_bulk(hdfs_file_path, process_func, write_dir)
|
|
440
412
|
return succeed
|
|
441
413
|
|
|
414
|
+
@retry(stop=stop_after_attempt(3),
|
|
415
|
+
wait=wait_random(min=10, max=30),
|
|
416
|
+
# retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
|
|
417
|
+
before_sleep=on_retry, # 每次抛错后使用
|
|
418
|
+
retry_error_callback=on_retry_error, # 如果最后没成功 返回 False
|
|
419
|
+
reraise=True)
|
|
420
|
+
async def _batch_process_file(self, hdfs_file_path: str, process_func: Callable[[str], Any],
|
|
421
|
+
write_dir: str = None):
|
|
422
|
+
"""批量更新所有 gz 文件"""
|
|
423
|
+
if self.db_handler.is_file_processed(hdfs_file_path):
|
|
424
|
+
print(f"跳过已处理文件: {hdfs_file_path}")
|
|
425
|
+
return True # 如果文件已处理,跳过
|
|
426
|
+
succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
|
|
427
|
+
return succeed
|
|
428
|
+
|
|
442
429
|
|
|
443
430
|
# 全局变量,每个进程独立持有
|
|
444
431
|
_processor: DataProcessor | None = None
|
|
@@ -506,3 +493,5 @@ def run_worker_many(hdfs_dir: str, process_func: Callable[[List[str]], Any] | Ca
|
|
|
506
493
|
for result in results:
|
|
507
494
|
if result:
|
|
508
495
|
print(result)
|
|
496
|
+
db3_count = processor.db_handler.get_processed_files_count()
|
|
497
|
+
print(f"db3文件数据量{db3_count},文件实际数据量{len(all_file)},是否完成全部转移: {db3_count == len(all_file)}")
|
{re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_reader.py
RENAMED
|
@@ -12,8 +12,9 @@ from re_common.v2.baselibrary.tools.data_processer.base import BaseFileReader
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class HDFSFileReader(BaseFileReader):
|
|
15
|
-
def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
16
|
-
|
|
15
|
+
def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
16
|
+
hdfs_user: str = "root"):
|
|
17
|
+
super().__init__(batch_size, read_model)
|
|
17
18
|
self.client = InsecureClient(hdfs_url, user=hdfs_user)
|
|
18
19
|
|
|
19
20
|
def list_files(self, path: str) -> List[str]:
|
|
@@ -48,8 +49,9 @@ class HDFSFileReader(BaseFileReader):
|
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
class HDFSGZFileReader(BaseFileReader):
|
|
51
|
-
def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
52
|
-
|
|
52
|
+
def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
53
|
+
hdfs_user: str = "root"):
|
|
54
|
+
super().__init__(batch_size, read_model)
|
|
53
55
|
self.hdfs_url = hdfs_url
|
|
54
56
|
self.hdfs_user = hdfs_user
|
|
55
57
|
self.client = None
|
|
@@ -99,8 +101,9 @@ class HDFSGZFileReader(BaseFileReader):
|
|
|
99
101
|
|
|
100
102
|
|
|
101
103
|
class HDFSParquetFileReader(BaseFileReader):
|
|
102
|
-
def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
103
|
-
|
|
104
|
+
def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
|
|
105
|
+
hdfs_user: str = "root"):
|
|
106
|
+
super().__init__(batch_size, read_model)
|
|
104
107
|
self.client = InsecureClient(hdfs_url, user=hdfs_user)
|
|
105
108
|
|
|
106
109
|
def list_files(self, path: str) -> List[str]:
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def scan_dir_fast(path):
|
|
6
|
+
file_infos = []
|
|
7
|
+
with os.scandir(path) as entries:
|
|
8
|
+
for entry in entries:
|
|
9
|
+
if entry.is_file():
|
|
10
|
+
info = entry.stat()
|
|
11
|
+
file_infos.append({
|
|
12
|
+
"path": entry.path,
|
|
13
|
+
"size": info.st_size
|
|
14
|
+
})
|
|
15
|
+
return file_infos
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def scan_dir(dir_name, result_file):
|
|
19
|
+
# dir_name r"/share/fulltext/errors"
|
|
20
|
+
# result_file "file_info_errors.txt"
|
|
21
|
+
for root, dirs, files in os.walk(dir_name):
|
|
22
|
+
print(root)
|
|
23
|
+
lists = scan_dir_fast(root)
|
|
24
|
+
with open(result_file, "a", encoding="utf-8") as file:
|
|
25
|
+
for i in lists:
|
|
26
|
+
if i:
|
|
27
|
+
file.write(json.dumps(i, ensure_ascii=False) + "\n")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import itertools
|
|
2
|
+
from collections import Counter
|
|
2
3
|
from typing import List, Any, Tuple
|
|
3
4
|
|
|
4
5
|
|
|
@@ -67,4 +68,20 @@ def list_to_dict(list_data,key_name):
|
|
|
67
68
|
|
|
68
69
|
def split_list_by_step(lst, step=100):
|
|
69
70
|
# 一维列表按照步长转换成二维列表
|
|
70
|
-
return [lst[i:i + step] for i in range(0, len(lst), step)]
|
|
71
|
+
return [lst[i:i + step] for i in range(0, len(lst), step)]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def list_diff(l1, l2):
|
|
75
|
+
"""
|
|
76
|
+
非去重差异比较
|
|
77
|
+
Counter 虽然长得像字典,但它在运算符 & 和 - 上有特殊的定义。
|
|
78
|
+
这样 能获取重复差集
|
|
79
|
+
"""
|
|
80
|
+
c1, c2 = Counter(l1), Counter(l2)
|
|
81
|
+
# 共同部分
|
|
82
|
+
common = list((c1 & c2).elements())
|
|
83
|
+
# l1 多余的部分
|
|
84
|
+
extra1 = list((c1 - c2).elements())
|
|
85
|
+
# l2 多余的部分
|
|
86
|
+
extra2 = list((c2 - c1).elements())
|
|
87
|
+
return common, extra1, extra2
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from re_common.v2.baselibrary.tools.tree_processor.node import TreeNode
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def build_forest(node_list):
|
|
5
|
+
nodes = {} # cid -> TreeNode
|
|
6
|
+
has_parent = set()
|
|
7
|
+
|
|
8
|
+
# 第一步:创建所有节点
|
|
9
|
+
for cid, pid, count in node_list:
|
|
10
|
+
node = TreeNode(cid, count)
|
|
11
|
+
nodes[cid] = node
|
|
12
|
+
if pid is not None:
|
|
13
|
+
has_parent.add(cid)
|
|
14
|
+
|
|
15
|
+
# 第二步:连接 parent-child
|
|
16
|
+
for cid, pid, _ in node_list:
|
|
17
|
+
if pid is not None and pid in nodes:
|
|
18
|
+
parent = nodes[pid]
|
|
19
|
+
child = nodes[cid]
|
|
20
|
+
parent.children.append(child)
|
|
21
|
+
child.parent = parent
|
|
22
|
+
|
|
23
|
+
# 第三步:找所有根节点(即没有 parent 的)
|
|
24
|
+
roots = [node for cid, node in nodes.items() if node.parent is None]
|
|
25
|
+
return roots # 返回多棵树的根节点列表
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
class TreeNode:
|
|
2
|
+
def __init__(self, cid, count):
|
|
3
|
+
self.id = cid
|
|
4
|
+
self.count = count
|
|
5
|
+
self.children = []
|
|
6
|
+
self.parent = None
|
|
7
|
+
|
|
8
|
+
def add_child(self, child):
|
|
9
|
+
self.children.append(child)
|
|
10
|
+
child.parent = self
|
|
11
|
+
|
|
12
|
+
def is_leaf(self):
|
|
13
|
+
return len(self.children) == 0
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import atexit
|
|
2
|
+
import os
|
|
2
3
|
import sys
|
|
3
4
|
import asyncio
|
|
5
|
+
import traceback
|
|
6
|
+
|
|
4
7
|
import aiohttp
|
|
5
|
-
from typing import Optional
|
|
8
|
+
from typing import Optional, Union
|
|
6
9
|
|
|
7
10
|
from tenacity import retry, stop_after_attempt, wait_random
|
|
8
11
|
|
|
@@ -62,6 +65,13 @@ def on_retry_error(retry_state):
|
|
|
62
65
|
|
|
63
66
|
def on_retry(retry_state):
|
|
64
67
|
# 每次抛错进入该函数打印消息
|
|
68
|
+
|
|
69
|
+
# # 获取函数调用参数
|
|
70
|
+
# args = retry_state.args
|
|
71
|
+
# kwargs = retry_state.kwargs
|
|
72
|
+
#
|
|
73
|
+
# print(id(args[0]._get_session()))
|
|
74
|
+
|
|
65
75
|
print(
|
|
66
76
|
f"[HTTP 请求重试]"
|
|
67
77
|
f"当前重试 : 第 {retry_state.attempt_number} 次"
|
|
@@ -84,6 +94,8 @@ class ApiNetUtils:
|
|
|
84
94
|
_conn: Optional[aiohttp.TCPConnector] = None
|
|
85
95
|
_session: Optional[aiohttp.ClientSession] = None
|
|
86
96
|
_close_registered: bool = False # 确保清理函数只注册一次
|
|
97
|
+
_pid: Optional[int] = None # 当前进程的 PID
|
|
98
|
+
lock = asyncio.Lock()
|
|
87
99
|
|
|
88
100
|
@classmethod
|
|
89
101
|
async def _get_connector(cls) -> aiohttp.TCPConnector:
|
|
@@ -96,9 +108,9 @@ class ApiNetUtils:
|
|
|
96
108
|
cls._conn = aiohttp.TCPConnector(
|
|
97
109
|
limit=50, # 最大连接数
|
|
98
110
|
ssl=False, # 禁用SSL验证(按需开启)
|
|
99
|
-
force_close=
|
|
100
|
-
|
|
101
|
-
|
|
111
|
+
force_close=False, # 保持连接活跃
|
|
112
|
+
enable_cleanup_closed=True, # 自动清理关闭的连接 #
|
|
113
|
+
keepalive_timeout=4.99 # 比服务器的5s 小一点
|
|
102
114
|
)
|
|
103
115
|
return cls._conn
|
|
104
116
|
|
|
@@ -108,25 +120,41 @@ class ApiNetUtils:
|
|
|
108
120
|
获取共享会话(线程安全的延迟初始化)
|
|
109
121
|
包含自动注册清理机制
|
|
110
122
|
"""
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
123
|
+
async with cls.lock:
|
|
124
|
+
current_pid = os.getpid()
|
|
125
|
+
if cls._pid != current_pid:
|
|
126
|
+
# 新进程,重新初始化
|
|
127
|
+
if cls._session:
|
|
128
|
+
await cls.close()
|
|
129
|
+
cls._pid = current_pid
|
|
130
|
+
|
|
131
|
+
if cls._session is None or cls._session.closed or cls.is_loop_closed(cls._session):
|
|
132
|
+
if cls._session:
|
|
133
|
+
await cls.close()
|
|
134
|
+
# 获取连接器(会自动初始化)
|
|
135
|
+
connector = await cls._get_connector()
|
|
136
|
+
|
|
137
|
+
# 强制获取新的事件循环
|
|
138
|
+
loop = asyncio.get_event_loop()
|
|
139
|
+
|
|
140
|
+
timeout = aiohttp.ClientTimeout(
|
|
141
|
+
total=120, # 整个请求最多 30 秒
|
|
142
|
+
connect=10, # 最多 5 秒连接
|
|
143
|
+
sock_connect=10,
|
|
144
|
+
sock_read=110, # 最多 20 秒读取响应数据
|
|
145
|
+
)
|
|
119
146
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
147
|
+
# 创建新会话
|
|
148
|
+
cls._session = aiohttp.ClientSession(
|
|
149
|
+
connector=connector,
|
|
150
|
+
timeout=timeout, # 默认30秒超时
|
|
151
|
+
loop=loop,
|
|
152
|
+
) # 显式指定事件循环
|
|
125
153
|
|
|
126
|
-
|
|
127
|
-
|
|
154
|
+
# # 注册退出时的清理钩子
|
|
155
|
+
cls._register_cleanup()
|
|
128
156
|
|
|
129
|
-
|
|
157
|
+
return cls._session
|
|
130
158
|
|
|
131
159
|
@staticmethod
|
|
132
160
|
def is_loop_closed(session: aiohttp.ClientSession) -> bool:
|
|
@@ -135,7 +163,7 @@ class ApiNetUtils:
|
|
|
135
163
|
"""
|
|
136
164
|
loop = session._loop # 获取会话绑定的事件循环
|
|
137
165
|
if loop.is_closed():
|
|
138
|
-
|
|
166
|
+
print("Event loop is closed")
|
|
139
167
|
return True
|
|
140
168
|
# print("Event loop not is closed")
|
|
141
169
|
return False
|
|
@@ -75,3 +75,20 @@ class BaseTime(object):
|
|
|
75
75
|
current_time = BaseTime.get_current_beijing_time()
|
|
76
76
|
last_time = BaseTime.parse_beijing_time(last_time_str)
|
|
77
77
|
return current_time.hour != last_time.hour
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def is_weekday(num_weekday: int) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
判断当前日期是否为指定星期。
|
|
83
|
+
|
|
84
|
+
参数:
|
|
85
|
+
num_weekday (int): 表示星期的数字(1=星期一, 2=星期二, ..., 7=星期日)。
|
|
86
|
+
|
|
87
|
+
返回:
|
|
88
|
+
bool: 如果当前日期不是指定的星期,则返回 True;否则返回 False。
|
|
89
|
+
|
|
90
|
+
示例:
|
|
91
|
+
如果 num_weekday=6(星期六),而今天是星期五(weekday()=4),则返回 True。
|
|
92
|
+
"""
|
|
93
|
+
current_weekday = datetime.now().weekday() # 获取当前星期(0=星期一, 1=星期二, ..., 6=星期日)
|
|
94
|
+
return current_weekday != num_weekday - 1
|
|
@@ -94,7 +94,6 @@ aiomysql_pool = None
|
|
|
94
94
|
pool_lock = asyncio.Lock() # 全局异步锁
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
|
|
98
97
|
async def init_aiomysql_pool_async():
|
|
99
98
|
global aiomysql_pool
|
|
100
99
|
if aiomysql_pool is None:
|
|
@@ -110,6 +109,7 @@ client = None
|
|
|
110
109
|
motor_fs_lock = asyncio.Lock() # 全局异步锁
|
|
111
110
|
_loop_id_mongo = None
|
|
112
111
|
|
|
112
|
+
|
|
113
113
|
async def check_connection(client):
|
|
114
114
|
try:
|
|
115
115
|
print("check mongodb client ping")
|
|
@@ -142,6 +142,7 @@ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
|
|
|
142
142
|
_loop_id_mongo = id(asyncio.get_running_loop())
|
|
143
143
|
return motor_fs, client
|
|
144
144
|
|
|
145
|
+
|
|
145
146
|
# async def run_main():
|
|
146
147
|
# while True:
|
|
147
148
|
# uri = "mongodb://192.168.98.80:27001/wpdc"
|
|
@@ -154,3 +155,20 @@ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
|
|
|
154
155
|
#
|
|
155
156
|
# if __name__ == "__main__":
|
|
156
157
|
# asyncio.run(run_main())
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def get_connection(autocommit: bool = True) -> Connection:
|
|
161
|
+
from pymysql import Connection
|
|
162
|
+
from pymysql.cursors import DictCursor
|
|
163
|
+
import pymysql
|
|
164
|
+
db_conf = {
|
|
165
|
+
"host": "192.168.98.55",
|
|
166
|
+
"port": 4000,
|
|
167
|
+
"user": "dataware_house_baseUser",
|
|
168
|
+
"password": "FF19AF831AEBD580B450B16BF9264200",
|
|
169
|
+
"database": "dataware_house_base",
|
|
170
|
+
"autocommit": autocommit,
|
|
171
|
+
"cursorclass": DictCursor,
|
|
172
|
+
}
|
|
173
|
+
conn: Connection = pymysql.connect(**db_conf)
|
|
174
|
+
return conn
|