re-common 10.0.16__tar.gz → 10.0.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {re_common-10.0.16 → re_common-10.0.17}/PKG-INFO +1 -1
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/hdfs_data_processer.py +40 -19
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/list_tools.py +14 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/BusinessStringUtil.py +8 -1
- re_common-10.0.17/re_common/v2/baselibrary/utils/base_string_similarity.py +158 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/string_clear.py +10 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common.egg-info/PKG-INFO +1 -1
- {re_common-10.0.16 → re_common-10.0.17}/re_common.egg-info/SOURCES.txt +1 -0
- {re_common-10.0.16 → re_common-10.0.17}/setup.py +1 -1
- {re_common-10.0.16 → re_common-10.0.17}/LICENSE +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/README.md +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/baseabs/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/baseabs/baseabs.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/database/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/database/mbuilder.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/database/moudle.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/database/msqlite3.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/database/mysql.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/database/sql_factory.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/mthread/MThreadingRun.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/mthread/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/mthread/mythreading.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/pakge_other/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/pakge_other/socks.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/readconfig/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/readconfig/config_factory.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/readconfig/ini_config.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/readconfig/toml_config.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/temporary/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/temporary/envdata.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/mrequest.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/requests_request.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/contrast_db3.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/copy_file.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/db3_2_sizedb3.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/foreachgz.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/get_attr.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/image_to_pdf.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/java_code_deal.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/javacode.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mdb_db3.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/merge_file.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/merge_gz_file.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mongo_tools.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_file.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/use_mv.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/myparsel.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/rename_dir_file.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/sequoiadb_utils.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/split_line_to_many.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/stringtodicts.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/workwechant_bot.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseaiohttp.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseaiomysql.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseallstep.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseavro.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseboto3.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basecsv.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basedict.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basedir.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseencode.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseencoding.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseesdsl.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseexcel.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseexcept.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basefile.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseftp.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basegzip.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basehdfs.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basehttpx.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseip.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basejson.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baselist.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basemotor.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basemssql.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseodbc.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basepandas.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basepeewee.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basepika.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basepydash.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basepymongo.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basequeue.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baserar.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baserequest.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseset.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basesmb.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basestring.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basetime.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basetuple.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/baseurl.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/basezip.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/core/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/core/bottomutils.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/core/mdeprecated.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/core/mlamada.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/core/msginfo.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/core/requests_core.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/fateadm.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/importfun.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/mfaker.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/my_abc/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/my_abc/better_abc.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/mylogger.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/myredisclient.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/pipupgrade.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/ringlist.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/version_compare.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/utils/ydmhttp.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/lazy_import.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/loggerfacade.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/mysqlfacade.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/now.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/sqlite3facade.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/use/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/use/mq_use_facade.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/facade/use/proxy_use_facade.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/base_dict_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/baseavro_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/basefile_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/basemssql_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/baseodbc_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/basepandas_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/get_attr_test/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/get_attr_test/settings.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/idencode_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/iniconfig_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/ip_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/merge_file_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/mfaker_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/mm3_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/mylogger_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/myparsel_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/mysql_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/pymongo_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/split_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/sqlite3_merge_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/sqlite3_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/tomlconfig_test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/use_tools_test/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/libtest/user/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/assignment_expressions.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/mydash/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/mydash/test1.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/pydashstudio/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/pydashstudio/first.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/streamlitstudio/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/streamlitstudio/first_app.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/streamlitstudio/uber_pickups.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/studio/test.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/decorators/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/decorators/utils.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/helpers/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/s3object/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/s3object/baseboto3.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/WeChatRobot.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/ac_ahocorasick.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/dict_tools.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/dolphinscheduler.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/search_hash_tools.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/text_matcher.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/unionfind_tools.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/author_smi.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/basedict.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/basehdfs.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/basepika.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/db.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/json_cls.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/mq.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/n_ary_expression_tree.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/string_bool.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/string_smi.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/stringutils.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/base_step_process.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/baseencodeid.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/changetaskname.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/core_var.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/mmh3Hash.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/allproxys.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/allproxys_thread.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/cnki_proxy.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/kuaidaili.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/proxy_all.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/update_kuaidaili_0.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/wanfang_proxy.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/proxy/wp_proxy_all.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/read_rawid_to_txt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformBookTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common/vip/title/transform/__init__.py +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common.egg-info/dependency_links.txt +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/re_common.egg-info/top_level.txt +0 -0
- {re_common-10.0.16 → re_common-10.0.17}/setup.cfg +0 -0
{re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/tools/hdfs_data_processer.py
RENAMED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import gzip
|
|
3
3
|
import json
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
import sqlite3
|
|
5
6
|
import time
|
|
6
7
|
import os
|
|
@@ -12,12 +13,12 @@ from hdfs import InsecureClient
|
|
|
12
13
|
|
|
13
14
|
class HDFSDataProcessor:
|
|
14
15
|
def __init__(
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
self,
|
|
17
|
+
hdfs_url="http://VIP-DC-MASTER-2:9870",
|
|
18
|
+
hdfs_user="root",
|
|
19
|
+
db_file="processed_files.db",
|
|
20
|
+
batch_size=50,
|
|
21
|
+
retry_limit=3,
|
|
21
22
|
):
|
|
22
23
|
self.hdfs_url = hdfs_url
|
|
23
24
|
self.hdfs_user = hdfs_user
|
|
@@ -44,14 +45,20 @@ class HDFSDataProcessor:
|
|
|
44
45
|
"""保存处理过的文件"""
|
|
45
46
|
with sqlite3.connect(self.db_file) as conn:
|
|
46
47
|
cursor = conn.cursor()
|
|
47
|
-
cursor.execute(
|
|
48
|
+
cursor.execute(
|
|
49
|
+
"INSERT OR IGNORE INTO processed_files (file_path) VALUES (?)",
|
|
50
|
+
(file_path,),
|
|
51
|
+
)
|
|
48
52
|
conn.commit()
|
|
49
53
|
|
|
50
54
|
def is_file_processed(self, file_path):
|
|
51
55
|
"""检查文件是否已处理"""
|
|
52
56
|
with sqlite3.connect(self.db_file) as conn:
|
|
53
57
|
cursor = conn.cursor()
|
|
54
|
-
cursor.execute(
|
|
58
|
+
cursor.execute(
|
|
59
|
+
"SELECT file_path FROM processed_files WHERE file_path = ?",
|
|
60
|
+
(file_path,),
|
|
61
|
+
)
|
|
55
62
|
result = cursor.fetchone()
|
|
56
63
|
return result is not None
|
|
57
64
|
|
|
@@ -81,7 +88,7 @@ class HDFSDataProcessor:
|
|
|
81
88
|
break
|
|
82
89
|
yield lines
|
|
83
90
|
|
|
84
|
-
def all_read_gz(self, gz_file_path: str, encoding=
|
|
91
|
+
def all_read_gz(self, gz_file_path: str, encoding="utf-8"):
|
|
85
92
|
"""
|
|
86
93
|
读取 HDFS 上的 .gz 文件内容。
|
|
87
94
|
:param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
|
|
@@ -94,7 +101,7 @@ class HDFSDataProcessor:
|
|
|
94
101
|
content = gz_file.read().decode(encoding) # 解码为字符串
|
|
95
102
|
print(f"文件读取成功: {gz_file_path}")
|
|
96
103
|
lines = [i for i in content.split("\n") if i.strip()]
|
|
97
|
-
result = [lines[i:i + self.batch_size] for i in range(0, len(lines), self.batch_size)]
|
|
104
|
+
result = [lines[i : i + self.batch_size] for i in range(0, len(lines), self.batch_size)]
|
|
98
105
|
return result
|
|
99
106
|
|
|
100
107
|
async def process_data(self, data, process_func):
|
|
@@ -107,14 +114,15 @@ class HDFSDataProcessor:
|
|
|
107
114
|
except Exception as e:
|
|
108
115
|
retry_count += 1
|
|
109
116
|
print(f"处理数据时发生错误: {e}, 正在重试 {retry_count}/{self.retry_limit}, data: {data}")
|
|
110
|
-
await asyncio.sleep(2
|
|
117
|
+
await asyncio.sleep(2**retry_count)
|
|
111
118
|
print(f"处理数据失败, 达到重试上限, data: {data}")
|
|
112
119
|
|
|
113
|
-
async def process_file(self, hdfs_file_path, process_func):
|
|
120
|
+
async def process_file(self, hdfs_file_path, process_func, write_dir: str):
|
|
114
121
|
"""处理单个 gz 文件"""
|
|
115
122
|
total_lines = self.count_total_lines(hdfs_file_path)
|
|
116
123
|
processed_lines = 0
|
|
117
124
|
start_time = time.time()
|
|
125
|
+
results = []
|
|
118
126
|
# # 这里根据不同的配置选用不同的读取文件的方法
|
|
119
127
|
for lines in self.read_hdfs_fanc[self.read_hdfs_model](hdfs_file_path):
|
|
120
128
|
processing_start_time = time.time() # 记录本批处理开始时间
|
|
@@ -128,7 +136,7 @@ class HDFSDataProcessor:
|
|
|
128
136
|
print(f"解析JSON失败: {e}, 行内容: {line.strip()}")
|
|
129
137
|
|
|
130
138
|
# await AsyncTaskPool(self.batch_size).run(tasks) # AsyncTaskPool 适用于一次提交所有任务, 限制并发数执行
|
|
131
|
-
await asyncio.gather(*tasks)
|
|
139
|
+
results.extend(await asyncio.gather(*tasks))
|
|
132
140
|
|
|
133
141
|
processed_lines += len(lines)
|
|
134
142
|
|
|
@@ -152,6 +160,18 @@ class HDFSDataProcessor:
|
|
|
152
160
|
f"预估剩余时间: {remaining_time:.2f}秒 | 平均每条处理时间: {avg_processing_time:.2f}毫秒"
|
|
153
161
|
)
|
|
154
162
|
|
|
163
|
+
def generate_write_data(results):
|
|
164
|
+
for res in results:
|
|
165
|
+
yield str(res) + "\n"
|
|
166
|
+
|
|
167
|
+
if write_dir is not None:
|
|
168
|
+
self.client.write(
|
|
169
|
+
write_dir.rstrip("/") + f"/{Path(hdfs_file_path).stem}",
|
|
170
|
+
data=generate_write_data(results),
|
|
171
|
+
overwrite=True,
|
|
172
|
+
encoding="utf-8",
|
|
173
|
+
)
|
|
174
|
+
|
|
155
175
|
# 最终进度显示
|
|
156
176
|
final_elapsed_time = time.time() - start_time # 最终已用时间
|
|
157
177
|
print(
|
|
@@ -164,22 +184,22 @@ class HDFSDataProcessor:
|
|
|
164
184
|
|
|
165
185
|
self.save_processed_file(hdfs_file_path) # 保存处理过的文件
|
|
166
186
|
|
|
167
|
-
async def retry_process_file(self, hdfs_file_path, process_func):
|
|
187
|
+
async def retry_process_file(self, hdfs_file_path, process_func, write_dir):
|
|
168
188
|
"""带重试机制的文件处理"""
|
|
169
189
|
retry_count = 0
|
|
170
190
|
while retry_count < self.retry_limit:
|
|
171
191
|
try:
|
|
172
|
-
await self.process_file(hdfs_file_path, process_func)
|
|
192
|
+
await self.process_file(hdfs_file_path, process_func, write_dir)
|
|
173
193
|
return True # 成功处理后退出
|
|
174
194
|
except Exception as e:
|
|
175
195
|
retry_count += 1
|
|
176
196
|
print(f"处理文件 {hdfs_file_path} 时发生错误: {e},正在重试 {retry_count}/{self.retry_limit}")
|
|
177
|
-
await asyncio.sleep(2
|
|
197
|
+
await asyncio.sleep(2**retry_count)
|
|
178
198
|
print(f"处理文件 {hdfs_file_path} 失败,达到重试上限")
|
|
179
199
|
return False
|
|
180
200
|
# raise
|
|
181
201
|
|
|
182
|
-
async def batch_process_file(self, hdfs_dir: str, process_func: Callable[[dict], Any]):
|
|
202
|
+
async def batch_process_file(self, hdfs_dir: str, process_func: Callable[[dict], Any], write_dir: str = None):
|
|
183
203
|
"""批量更新所有 gz 文件"""
|
|
184
204
|
gz_files = self.list_gz_files(hdfs_dir)
|
|
185
205
|
all_succeed = True
|
|
@@ -187,7 +207,7 @@ class HDFSDataProcessor:
|
|
|
187
207
|
if self.is_file_processed(hdfs_file_path):
|
|
188
208
|
print(f"跳过已处理文件: {hdfs_file_path}")
|
|
189
209
|
continue # 如果文件已处理,跳过
|
|
190
|
-
succeed = await self.retry_process_file(hdfs_file_path, process_func) # 处理文件
|
|
210
|
+
succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
|
|
191
211
|
if succeed is False:
|
|
192
212
|
all_succeed = False
|
|
193
213
|
|
|
@@ -276,7 +296,7 @@ class HDFSDataProcessor:
|
|
|
276
296
|
except Exception as e:
|
|
277
297
|
retry_count += 1
|
|
278
298
|
print(f"处理文件 {hdfs_file_path} 时发生错误: {e},正在重试 {retry_count}/{self.retry_limit}")
|
|
279
|
-
await asyncio.sleep(2
|
|
299
|
+
await asyncio.sleep(2**retry_count)
|
|
280
300
|
print(f"处理文件 {hdfs_file_path} 失败,达到重试上限")
|
|
281
301
|
return False
|
|
282
302
|
|
|
@@ -301,6 +321,7 @@ class HDFSDataProcessor:
|
|
|
301
321
|
except Exception as e:
|
|
302
322
|
print(f"删除断点重试文件失败: {e}")
|
|
303
323
|
|
|
324
|
+
|
|
304
325
|
# # 使用示例
|
|
305
326
|
# async def update_refer(data: dict):
|
|
306
327
|
# ref_id = data["ref_id"]
|
|
@@ -50,3 +50,17 @@ def filter_and_sort_by_smi(all_list, top_n=1000):
|
|
|
50
50
|
|
|
51
51
|
# 3. 取前 top_n 个
|
|
52
52
|
return unique_list[:top_n]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def list_to_dict(list_data,key_name):
|
|
56
|
+
# 使用 defaultdict 来处理重复 id
|
|
57
|
+
from collections import defaultdict
|
|
58
|
+
|
|
59
|
+
dict_data = defaultdict(list)
|
|
60
|
+
|
|
61
|
+
for item in list_data:
|
|
62
|
+
dict_data[item[key_name]].append(item)
|
|
63
|
+
|
|
64
|
+
# 将 defaultdict 转换成普通字典
|
|
65
|
+
dict_data = dict(dict_data)
|
|
66
|
+
return dict_data
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/v2/baselibrary/utils/BusinessStringUtil.py
RENAMED
|
@@ -177,7 +177,14 @@ def deal_num(num_str):
|
|
|
177
177
|
if num_str.lower().startswith("n "):
|
|
178
178
|
num_str = num_str.lower().replace("n ", "").strip()
|
|
179
179
|
|
|
180
|
+
num_str = num_str.lower().replace("special_issue_", '').replace("_special_issue", '').replace("issue", "")
|
|
181
|
+
num_str = num_str.replace("spec.", "").replace("iss.", "").replace("spl.", "").replace("special.", "").replace(
|
|
182
|
+
"specialissue.", "")
|
|
183
|
+
num_str = num_str.replace("spec", "").replace("iss", "").replace("spl", "").replace("special", "").replace(
|
|
184
|
+
"specialissue", '')
|
|
185
|
+
|
|
180
186
|
num_str = num_str.replace("-", "_").replace(".", "_").upper()
|
|
187
|
+
num_str = num_str.lstrip("_").rstrip("_")
|
|
181
188
|
if num_str.find("_") > -1:
|
|
182
189
|
start, end = num_str.split("_")
|
|
183
190
|
start = deal_num_strs(start)
|
|
@@ -186,4 +193,4 @@ def deal_num(num_str):
|
|
|
186
193
|
else:
|
|
187
194
|
num_str = deal_num_strs(num_str)
|
|
188
195
|
|
|
189
|
-
return num_str
|
|
196
|
+
return num_str.lower().strip()
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import Levenshtein
|
|
2
|
+
import jellyfish
|
|
3
|
+
from rapidfuzz.distance import DamerauLevenshtein, Hamming, Indel, LCSseq, OSA
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseStringSimilarity(object):
|
|
7
|
+
|
|
8
|
+
@classmethod
|
|
9
|
+
def levenshtein_similarity(cls, str1, str2) -> float:
|
|
10
|
+
"""
|
|
11
|
+
返回 两个字字符串之间的编辑距离 分数
|
|
12
|
+
"""
|
|
13
|
+
# 编辑距离长度
|
|
14
|
+
distance = Levenshtein.distance(str1, str2)
|
|
15
|
+
# 以最长字符串为除数算分
|
|
16
|
+
similarity = 1 - (distance / max(len(str1), len(str2)))
|
|
17
|
+
return similarity
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def damerau_normalized_distance_similarity(cls, str1, str2) -> float:
|
|
21
|
+
"""
|
|
22
|
+
# 计算 归一化的编辑距离,取值范围 [0, 1],值越小表示越相似。 一般不以小评估分 所以不用
|
|
23
|
+
similarity = DamerauLevenshtein.normalized_distance(str1, str2)
|
|
24
|
+
作用:计算 相似度得分,取值范围 [0, max_len],值越大表示越相似。
|
|
25
|
+
print(DamerauLevenshtein.similarity(str1, str2))
|
|
26
|
+
"""
|
|
27
|
+
# 该算法与 cls.levenshtein_similarity 算法一致 只是 编辑距离的得值不一样
|
|
28
|
+
similarity = DamerauLevenshtein.normalized_similarity(str1, str2)
|
|
29
|
+
return similarity
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def indel_levenshtein_similarity(cls, str1, str2) -> float:
|
|
33
|
+
"""
|
|
34
|
+
本质上使用的 是 Indel.normalized_similarity(str1,str2) 方法
|
|
35
|
+
|
|
36
|
+
计算 str1 和 str2 之间的 Indel 距离(插入和删除操作的最小次数)
|
|
37
|
+
Indel.distance(str1, str2)
|
|
38
|
+
计算 标准化后的 Indel 距离,取值范围在 [0, 1] 之间,其中 0 表示完全相同,1 表示完全不同。 ``distance / (len1 + len2)``.
|
|
39
|
+
Indel.normalized_distance(str1, str2)
|
|
40
|
+
计算 [max, 0] 范围内的 Indel 相似度。计算公式为“(len1 + len2) - distance”
|
|
41
|
+
Indel.similarity(str1, str2)
|
|
42
|
+
计算 [0, 1] 范围内的归一化插入/缺失相似度。计算公式为“1 - normalized_distance”
|
|
43
|
+
Indel.normalized_similarity(str1, str2)
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
# 计算相似度(0到1之间的值,1表示完全相同)
|
|
47
|
+
similarity = Levenshtein.ratio(str1, str2)
|
|
48
|
+
return similarity
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def jaro_similarity(cls, str1, str2) -> float:
|
|
52
|
+
"""
|
|
53
|
+
Jaro 相似度是一种用于测量两个字符串相似度的算法,主要考虑:
|
|
54
|
+
匹配的字符
|
|
55
|
+
字符顺序
|
|
56
|
+
字符转置(位置交换)
|
|
57
|
+
|
|
58
|
+
与 Jaro.normalized_similarity(str1,str2) 一致
|
|
59
|
+
"""
|
|
60
|
+
return jellyfish.jaro_similarity(str1, str2)
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def jaro_winkler_similarity(cls, str1, str2) -> float:
|
|
64
|
+
"""
|
|
65
|
+
Jaro-Winkler 是 Jaro 的改进版,对前缀匹配给予更多权重
|
|
66
|
+
|
|
67
|
+
与 JaroWinkler.normalized_similarity(str1,str2) 结果一致
|
|
68
|
+
|
|
69
|
+
print(JaroWinkler.distance(str1, str2))
|
|
70
|
+
与 print(JaroWinkler.normalized_distance(str1, str2)) 结果一致
|
|
71
|
+
|
|
72
|
+
print(JaroWinkler.similarity(str1, str2))
|
|
73
|
+
与 print(JaroWinkler.normalized_similarity(str1,str2)) 结果一致
|
|
74
|
+
"""
|
|
75
|
+
return jellyfish.jaro_winkler_similarity(str1, str2)
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def osa_similarity(cls, str1, str2) -> float:
|
|
79
|
+
"""
|
|
80
|
+
计算 [0, 1] 范围内的归一化最佳字符串比对 (OSA) 相似度。
|
|
81
|
+
|
|
82
|
+
计算公式为“1 - normalized_distance”
|
|
83
|
+
"""
|
|
84
|
+
return OSA.normalized_similarity(str1, str2)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def lcs_seq_similarity(cls, str1, str2) -> float:
|
|
88
|
+
"""
|
|
89
|
+
计算 [0, 1] 范围内的归一化 LCS 相似度。
|
|
90
|
+
计算公式为“1 - normalized_distance”
|
|
91
|
+
"""
|
|
92
|
+
return LCSseq.normalized_similarity(str1, str2)
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def lcs_seq_distance(cls, str1, str2) -> int:
|
|
96
|
+
"""
|
|
97
|
+
LCSseq.distance 是 RapidFuzz 库中的一个方法,用于计算两个字符串之间的 最长公共子序列(Longest Common Subsequence, LCS)距离。
|
|
98
|
+
LCS 是指两个字符串中 按顺序出现但不一定连续 的最长子序列。例如:
|
|
99
|
+
"abcde" 和 "ace" 的 LCS 是 "ace"(长度 3)。
|
|
100
|
+
"Druitt, Robert" 和 "Druitt R." 的 LCS 可能是 "Druitt R"(长度 8)。
|
|
101
|
+
计算 [0, max] 范围内的 LCS 距离。
|
|
102
|
+
计算公式为“max(len1, len2) - 相似度”。
|
|
103
|
+
"""
|
|
104
|
+
return LCSseq.distance(str1, str2)
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def osa_distance(cls, str1, str2) -> int:
|
|
108
|
+
"""
|
|
109
|
+
OSA.distance(Optimal String Alignment,最优字符串对齐距离)是 RapidFuzz 库中的一个方法,用于计算两个字符串之间的 编辑距离(Edit Distance),但比标准的 Levenshtein 距离 限制更严格。
|
|
110
|
+
|
|
111
|
+
OSA 额外允许 相邻字符交换(Transposition),但限制比 Damerau-Levenshtein 更严格(Damerau 允许多次交换,而 OSA 仅限一次)。
|
|
112
|
+
"""
|
|
113
|
+
return OSA.distance(str1, str2)
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def levenshtein_distance(cls, str1, str2) -> int:
|
|
117
|
+
"""
|
|
118
|
+
返回 两个字字符串之间的编辑距离 分数
|
|
119
|
+
标准 Levenshtein 距离 允许 插入、删除、替换 三种操作,但不允许 相邻字符交换(transposition)
|
|
120
|
+
|
|
121
|
+
jellyfish.levenshtein_distance(str1,str2) 该方法结果与 本方法一致
|
|
122
|
+
|
|
123
|
+
print(Jaro.distance(str1, str2))
|
|
124
|
+
与 print(Jaro.normalized_distance(str1, str2)) 结果一致
|
|
125
|
+
|
|
126
|
+
print(Jaro.similarity(str1, str2))
|
|
127
|
+
与 print(Jaro.normalized_similarity(str1,str2)) 结果一致
|
|
128
|
+
"""
|
|
129
|
+
# 编辑距离长度
|
|
130
|
+
distance = Levenshtein.distance(str1, str2)
|
|
131
|
+
print(jellyfish.levenshtein_distance(str1, str2))
|
|
132
|
+
return distance
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def indel_distance(cls, str1, str2) -> int:
|
|
136
|
+
"""
|
|
137
|
+
Indel(Insertion + Deletion)距离是 仅考虑插入和删除操作 的编辑距离,不考虑替换操作。
|
|
138
|
+
"""
|
|
139
|
+
return Indel.distance(str1, str2)
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def damerau_levenshtein_distance(cls, str1, str2) -> int:
|
|
143
|
+
"""
|
|
144
|
+
Damerau-Levenshtein 距离是 Levenshtein 距离的修改,它将换位(例如将 ifsh 表示为 fish)计为一次编辑
|
|
145
|
+
"""
|
|
146
|
+
# 编辑距离长度
|
|
147
|
+
distance = jellyfish.damerau_levenshtein_distance(str1, str2)
|
|
148
|
+
print(DamerauLevenshtein.distance(str1, str2))
|
|
149
|
+
return distance
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def hamming_distance(cls, str1, str2) -> int:
|
|
153
|
+
return Hamming.distance(str1, str2)
|
|
154
|
+
|
|
155
|
+
# str1 = "primulina elegant ladyis a new culitvar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate"
|
|
156
|
+
# str2 = "primulinaelegant labyis a new cultivar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate 2019 editorial office of acta horticulturae sinica all rights reserved"
|
|
157
|
+
# # str1 = "primulina elegant ladyis a new cultivar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate"
|
|
158
|
+
# # str2 = "primulinaelegant ladyis a new cultivar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate 2019 editorial office of acta horticulturae sinica all rights reserved"
|
|
@@ -6,6 +6,11 @@ import regex
|
|
|
6
6
|
from re_common.v2.baselibrary.utils.stringutils import qj2bj, bj2qj, get_diacritic_variant, clean_html, \
|
|
7
7
|
remove_spaces_between_chinese_characters
|
|
8
8
|
|
|
9
|
+
from opencc import OpenCC
|
|
10
|
+
|
|
11
|
+
# pip install opencc-python-reimplemented
|
|
12
|
+
cc = OpenCC("t2s") # t2s是繁体转简体
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
class StringClear(object):
|
|
11
16
|
|
|
@@ -31,6 +36,11 @@ class StringClear(object):
|
|
|
31
36
|
self.obj_str = bj2qj(self.obj_str)
|
|
32
37
|
return self
|
|
33
38
|
|
|
39
|
+
def convert_to_simplified(self):
|
|
40
|
+
# 繁体转简体
|
|
41
|
+
self.obj_str = cc.convert(self.obj_str)
|
|
42
|
+
return self
|
|
43
|
+
|
|
34
44
|
def lower(self):
|
|
35
45
|
self.obj_str = self.obj_str.lower()
|
|
36
46
|
return self
|
|
@@ -188,6 +188,7 @@ re_common/v2/baselibrary/tools/unionfind_tools.py
|
|
|
188
188
|
re_common/v2/baselibrary/utils/BusinessStringUtil.py
|
|
189
189
|
re_common/v2/baselibrary/utils/__init__.py
|
|
190
190
|
re_common/v2/baselibrary/utils/author_smi.py
|
|
191
|
+
re_common/v2/baselibrary/utils/base_string_similarity.py
|
|
191
192
|
re_common/v2/baselibrary/utils/basedict.py
|
|
192
193
|
re_common/v2/baselibrary/utils/basehdfs.py
|
|
193
194
|
re_common/v2/baselibrary/utils/basepika.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/__init__.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/aiohttp_request.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/httpx_requet.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/mrequest.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/requests_request.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/batch_compre/__init__.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/batch_compre/bijiao_batch.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py
RENAMED
|
File without changes
|
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/move_mongo_table.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/mpandasreadexcel.py
RENAMED
|
File without changes
|
{re_common-10.0.16 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/pandas_visualization.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|