re-common 10.0.15__tar.gz → 10.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. {re_common-10.0.15 → re_common-10.0.17}/PKG-INFO +1 -1
  2. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/hdfs_data_processer.py +41 -20
  3. re_common-10.0.17/re_common/v2/baselibrary/tools/list_tools.py +66 -0
  4. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/BusinessStringUtil.py +8 -1
  5. re_common-10.0.17/re_common/v2/baselibrary/utils/base_string_similarity.py +158 -0
  6. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/db.py +1 -1
  7. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/string_bool.py +48 -1
  8. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/string_clear.py +10 -0
  9. {re_common-10.0.15 → re_common-10.0.17}/re_common.egg-info/PKG-INFO +1 -1
  10. {re_common-10.0.15 → re_common-10.0.17}/re_common.egg-info/SOURCES.txt +1 -0
  11. {re_common-10.0.15 → re_common-10.0.17}/setup.py +1 -1
  12. re_common-10.0.15/re_common/v2/baselibrary/tools/list_tools.py +0 -9
  13. {re_common-10.0.15 → re_common-10.0.17}/LICENSE +0 -0
  14. {re_common-10.0.15 → re_common-10.0.17}/README.md +0 -0
  15. {re_common-10.0.15 → re_common-10.0.17}/re_common/__init__.py +0 -0
  16. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/__init__.py +0 -0
  17. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/baseabs/__init__.py +0 -0
  18. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/baseabs/baseabs.py +0 -0
  19. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/database/__init__.py +0 -0
  20. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/database/mbuilder.py +0 -0
  21. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/database/moudle.py +0 -0
  22. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/database/msqlite3.py +0 -0
  23. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/database/mysql.py +0 -0
  24. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/database/sql_factory.py +0 -0
  25. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/mthread/MThreadingRun.py +0 -0
  26. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -0
  27. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/mthread/__init__.py +0 -0
  28. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/mthread/mythreading.py +0 -0
  29. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/pakge_other/__init__.py +0 -0
  30. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/pakge_other/socks.py +0 -0
  31. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/readconfig/__init__.py +0 -0
  32. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/readconfig/config_factory.py +0 -0
  33. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/readconfig/ini_config.py +0 -0
  34. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/readconfig/toml_config.py +0 -0
  35. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/temporary/__init__.py +0 -0
  36. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/temporary/envdata.py +0 -0
  37. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/__init__.py +0 -0
  38. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  39. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -0
  40. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -0
  41. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/mrequest.py +0 -0
  42. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/all_requests/requests_request.py +0 -0
  43. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  44. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -0
  45. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/contrast_db3.py +0 -0
  46. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/copy_file.py +0 -0
  47. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/db3_2_sizedb3.py +0 -0
  48. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/foreachgz.py +0 -0
  49. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/get_attr.py +0 -0
  50. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/image_to_pdf.py +0 -0
  51. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/java_code_deal.py +0 -0
  52. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/javacode.py +0 -0
  53. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mdb_db3.py +0 -0
  54. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/merge_file.py +0 -0
  55. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/merge_gz_file.py +0 -0
  56. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  57. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -0
  58. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -0
  59. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -0
  60. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mongo_tools.py +0 -0
  61. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/move_file.py +0 -0
  62. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  63. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -0
  64. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -0
  65. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -0
  66. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/move_mongo/use_mv.py +0 -0
  67. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  68. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -0
  69. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -0
  70. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/myparsel.py +0 -0
  71. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/rename_dir_file.py +0 -0
  72. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/sequoiadb_utils.py +0 -0
  73. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/split_line_to_many.py +0 -0
  74. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/stringtodicts.py +0 -0
  75. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/tools/workwechant_bot.py +0 -0
  76. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/__init__.py +0 -0
  77. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseaiohttp.py +0 -0
  78. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseaiomysql.py +0 -0
  79. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseallstep.py +0 -0
  80. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseavro.py +0 -0
  81. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseboto3.py +0 -0
  82. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basecsv.py +0 -0
  83. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basedict.py +0 -0
  84. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basedir.py +0 -0
  85. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseencode.py +0 -0
  86. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseencoding.py +0 -0
  87. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseesdsl.py +0 -0
  88. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseexcel.py +0 -0
  89. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseexcept.py +0 -0
  90. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basefile.py +0 -0
  91. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseftp.py +0 -0
  92. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basegzip.py +0 -0
  93. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basehdfs.py +0 -0
  94. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basehttpx.py +0 -0
  95. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseip.py +0 -0
  96. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basejson.py +0 -0
  97. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baselist.py +0 -0
  98. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basemotor.py +0 -0
  99. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basemssql.py +0 -0
  100. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseodbc.py +0 -0
  101. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basepandas.py +0 -0
  102. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basepeewee.py +0 -0
  103. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basepika.py +0 -0
  104. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basepydash.py +0 -0
  105. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basepymongo.py +0 -0
  106. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basequeue.py +0 -0
  107. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baserar.py +0 -0
  108. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baserequest.py +0 -0
  109. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseset.py +0 -0
  110. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basesmb.py +0 -0
  111. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basestring.py +0 -0
  112. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basetime.py +0 -0
  113. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basetuple.py +0 -0
  114. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/baseurl.py +0 -0
  115. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/basezip.py +0 -0
  116. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/core/__init__.py +0 -0
  117. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/core/bottomutils.py +0 -0
  118. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/core/mdeprecated.py +0 -0
  119. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/core/mlamada.py +0 -0
  120. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/core/msginfo.py +0 -0
  121. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/core/requests_core.py +0 -0
  122. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/fateadm.py +0 -0
  123. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/importfun.py +0 -0
  124. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/mfaker.py +0 -0
  125. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/my_abc/__init__.py +0 -0
  126. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/my_abc/better_abc.py +0 -0
  127. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/mylogger.py +0 -0
  128. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/myredisclient.py +0 -0
  129. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/pipupgrade.py +0 -0
  130. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/ringlist.py +0 -0
  131. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/version_compare.py +0 -0
  132. {re_common-10.0.15 → re_common-10.0.17}/re_common/baselibrary/utils/ydmhttp.py +0 -0
  133. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/__init__.py +0 -0
  134. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/lazy_import.py +0 -0
  135. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/loggerfacade.py +0 -0
  136. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/mysqlfacade.py +0 -0
  137. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/now.py +0 -0
  138. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/sqlite3facade.py +0 -0
  139. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/use/__init__.py +0 -0
  140. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/use/mq_use_facade.py +0 -0
  141. {re_common-10.0.15 → re_common-10.0.17}/re_common/facade/use/proxy_use_facade.py +0 -0
  142. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/__init__.py +0 -0
  143. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/base_dict_test.py +0 -0
  144. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/baseavro_test.py +0 -0
  145. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/basefile_test.py +0 -0
  146. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/basemssql_test.py +0 -0
  147. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/baseodbc_test.py +0 -0
  148. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/basepandas_test.py +0 -0
  149. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/get_attr_test/__init__.py +0 -0
  150. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -0
  151. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/get_attr_test/settings.py +0 -0
  152. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/idencode_test.py +0 -0
  153. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/iniconfig_test.py +0 -0
  154. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/ip_test.py +0 -0
  155. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/merge_file_test.py +0 -0
  156. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/mfaker_test.py +0 -0
  157. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/mm3_test.py +0 -0
  158. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/mylogger_test.py +0 -0
  159. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/myparsel_test.py +0 -0
  160. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/mysql_test.py +0 -0
  161. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/pymongo_test.py +0 -0
  162. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/split_test.py +0 -0
  163. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/sqlite3_merge_test.py +0 -0
  164. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/sqlite3_test.py +0 -0
  165. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/tomlconfig_test.py +0 -0
  166. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/use_tools_test/__init__.py +0 -0
  167. {re_common-10.0.15 → re_common-10.0.17}/re_common/libtest/user/__init__.py +0 -0
  168. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/__init__.py +0 -0
  169. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/assignment_expressions.py +0 -0
  170. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/mydash/__init__.py +0 -0
  171. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/mydash/test1.py +0 -0
  172. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/pydashstudio/__init__.py +0 -0
  173. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/pydashstudio/first.py +0 -0
  174. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/streamlitstudio/__init__.py +0 -0
  175. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/streamlitstudio/first_app.py +0 -0
  176. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/streamlitstudio/uber_pickups.py +0 -0
  177. {re_common-10.0.15 → re_common-10.0.17}/re_common/studio/test.py +0 -0
  178. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/__init__.py +0 -0
  179. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/__init__.py +0 -0
  180. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/decorators/__init__.py +0 -0
  181. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/decorators/utils.py +0 -0
  182. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/helpers/__init__.py +0 -0
  183. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/s3object/__init__.py +0 -0
  184. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/s3object/baseboto3.py +0 -0
  185. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/WeChatRobot.py +0 -0
  186. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/__init__.py +0 -0
  187. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/ac_ahocorasick.py +0 -0
  188. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/dict_tools.py +0 -0
  189. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/dolphinscheduler.py +0 -0
  190. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/search_hash_tools.py +0 -0
  191. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/text_matcher.py +0 -0
  192. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/tools/unionfind_tools.py +0 -0
  193. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/__init__.py +0 -0
  194. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/author_smi.py +0 -0
  195. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/basedict.py +0 -0
  196. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/basehdfs.py +0 -0
  197. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/basepika.py +0 -0
  198. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/json_cls.py +0 -0
  199. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/mq.py +0 -0
  200. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/n_ary_expression_tree.py +0 -0
  201. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/string_smi.py +0 -0
  202. {re_common-10.0.15 → re_common-10.0.17}/re_common/v2/baselibrary/utils/stringutils.py +0 -0
  203. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/__init__.py +0 -0
  204. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/base_step_process.py +0 -0
  205. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/baseencodeid.py +0 -0
  206. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/changetaskname.py +0 -0
  207. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/core_var.py +0 -0
  208. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/mmh3Hash.py +0 -0
  209. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/__init__.py +0 -0
  210. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/allproxys.py +0 -0
  211. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/allproxys_thread.py +0 -0
  212. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/cnki_proxy.py +0 -0
  213. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/kuaidaili.py +0 -0
  214. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/proxy_all.py +0 -0
  215. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/update_kuaidaili_0.py +0 -0
  216. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/wanfang_proxy.py +0 -0
  217. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/proxy/wp_proxy_all.py +0 -0
  218. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/read_rawid_to_txt.py +0 -0
  219. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/__init__.py +0 -0
  220. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformBookTitleToZt.py +0 -0
  221. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -0
  222. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -0
  223. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -0
  224. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -0
  225. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -0
  226. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -0
  227. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -0
  228. {re_common-10.0.15 → re_common-10.0.17}/re_common/vip/title/transform/__init__.py +0 -0
  229. {re_common-10.0.15 → re_common-10.0.17}/re_common.egg-info/dependency_links.txt +0 -0
  230. {re_common-10.0.15 → re_common-10.0.17}/re_common.egg-info/top_level.txt +0 -0
  231. {re_common-10.0.15 → re_common-10.0.17}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.15
3
+ Version: 10.0.17
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import gzip
3
3
  import json
4
+ from pathlib import Path
4
5
  import sqlite3
5
6
  import time
6
7
  import os
@@ -12,12 +13,12 @@ from hdfs import InsecureClient
12
13
 
13
14
  class HDFSDataProcessor:
14
15
  def __init__(
15
- self,
16
- hdfs_url="http://VIP-DC-MASTER-2:9870",
17
- hdfs_user="root",
18
- db_file="processed_files.db",
19
- batch_size=50,
20
- retry_limit=3,
16
+ self,
17
+ hdfs_url="http://VIP-DC-MASTER-2:9870",
18
+ hdfs_user="root",
19
+ db_file="processed_files.db",
20
+ batch_size=50,
21
+ retry_limit=3,
21
22
  ):
22
23
  self.hdfs_url = hdfs_url
23
24
  self.hdfs_user = hdfs_user
@@ -44,14 +45,20 @@ class HDFSDataProcessor:
44
45
  """保存处理过的文件"""
45
46
  with sqlite3.connect(self.db_file) as conn:
46
47
  cursor = conn.cursor()
47
- cursor.execute("INSERT OR IGNORE INTO processed_files (file_path) VALUES (?)", (file_path,))
48
+ cursor.execute(
49
+ "INSERT OR IGNORE INTO processed_files (file_path) VALUES (?)",
50
+ (file_path,),
51
+ )
48
52
  conn.commit()
49
53
 
50
54
  def is_file_processed(self, file_path):
51
55
  """检查文件是否已处理"""
52
56
  with sqlite3.connect(self.db_file) as conn:
53
57
  cursor = conn.cursor()
54
- cursor.execute("SELECT file_path FROM processed_files WHERE file_path = ?", (file_path,))
58
+ cursor.execute(
59
+ "SELECT file_path FROM processed_files WHERE file_path = ?",
60
+ (file_path,),
61
+ )
55
62
  result = cursor.fetchone()
56
63
  return result is not None
57
64
 
@@ -81,7 +88,7 @@ class HDFSDataProcessor:
81
88
  break
82
89
  yield lines
83
90
 
84
- def all_read_gz(self, gz_file_path: str, encoding='utf-8'):
91
+ def all_read_gz(self, gz_file_path: str, encoding="utf-8"):
85
92
  """
86
93
  读取 HDFS 上的 .gz 文件内容。
87
94
  :param hdfs_path: HDFS 文件路径(必须以 .gz 结尾)
@@ -93,8 +100,8 @@ class HDFSDataProcessor:
93
100
  with gzip.GzipFile(fileobj=BytesIO(compressed_data)) as gz_file: # 解压缩
94
101
  content = gz_file.read().decode(encoding) # 解码为字符串
95
102
  print(f"文件读取成功: {gz_file_path}")
96
- lines = [i for i in content.splitlines() if i.strip()]
97
- result = [lines[i:i + self.batch_size] for i in range(0, len(lines), self.batch_size)]
103
+ lines = [i for i in content.split("\n") if i.strip()]
104
+ result = [lines[i : i + self.batch_size] for i in range(0, len(lines), self.batch_size)]
98
105
  return result
99
106
 
100
107
  async def process_data(self, data, process_func):
@@ -107,14 +114,15 @@ class HDFSDataProcessor:
107
114
  except Exception as e:
108
115
  retry_count += 1
109
116
  print(f"处理数据时发生错误: {e}, 正在重试 {retry_count}/{self.retry_limit}, data: {data}")
110
- await asyncio.sleep(2 ** retry_count)
117
+ await asyncio.sleep(2**retry_count)
111
118
  print(f"处理数据失败, 达到重试上限, data: {data}")
112
119
 
113
- async def process_file(self, hdfs_file_path, process_func):
120
+ async def process_file(self, hdfs_file_path, process_func, write_dir: str):
114
121
  """处理单个 gz 文件"""
115
122
  total_lines = self.count_total_lines(hdfs_file_path)
116
123
  processed_lines = 0
117
124
  start_time = time.time()
125
+ results = []
118
126
  # # 这里根据不同的配置选用不同的读取文件的方法
119
127
  for lines in self.read_hdfs_fanc[self.read_hdfs_model](hdfs_file_path):
120
128
  processing_start_time = time.time() # 记录本批处理开始时间
@@ -128,7 +136,7 @@ class HDFSDataProcessor:
128
136
  print(f"解析JSON失败: {e}, 行内容: {line.strip()}")
129
137
 
130
138
  # await AsyncTaskPool(self.batch_size).run(tasks) # AsyncTaskPool 适用于一次提交所有任务, 限制并发数执行
131
- await asyncio.gather(*tasks)
139
+ results.extend(await asyncio.gather(*tasks))
132
140
 
133
141
  processed_lines += len(lines)
134
142
 
@@ -152,6 +160,18 @@ class HDFSDataProcessor:
152
160
  f"预估剩余时间: {remaining_time:.2f}秒 | 平均每条处理时间: {avg_processing_time:.2f}毫秒"
153
161
  )
154
162
 
163
+ def generate_write_data(results):
164
+ for res in results:
165
+ yield str(res) + "\n"
166
+
167
+ if write_dir is not None:
168
+ self.client.write(
169
+ write_dir.rstrip("/") + f"/{Path(hdfs_file_path).stem}",
170
+ data=generate_write_data(results),
171
+ overwrite=True,
172
+ encoding="utf-8",
173
+ )
174
+
155
175
  # 最终进度显示
156
176
  final_elapsed_time = time.time() - start_time # 最终已用时间
157
177
  print(
@@ -164,22 +184,22 @@ class HDFSDataProcessor:
164
184
 
165
185
  self.save_processed_file(hdfs_file_path) # 保存处理过的文件
166
186
 
167
- async def retry_process_file(self, hdfs_file_path, process_func):
187
+ async def retry_process_file(self, hdfs_file_path, process_func, write_dir):
168
188
  """带重试机制的文件处理"""
169
189
  retry_count = 0
170
190
  while retry_count < self.retry_limit:
171
191
  try:
172
- await self.process_file(hdfs_file_path, process_func)
192
+ await self.process_file(hdfs_file_path, process_func, write_dir)
173
193
  return True # 成功处理后退出
174
194
  except Exception as e:
175
195
  retry_count += 1
176
196
  print(f"处理文件 {hdfs_file_path} 时发生错误: {e},正在重试 {retry_count}/{self.retry_limit}")
177
- await asyncio.sleep(2 ** retry_count)
197
+ await asyncio.sleep(2**retry_count)
178
198
  print(f"处理文件 {hdfs_file_path} 失败,达到重试上限")
179
199
  return False
180
200
  # raise
181
201
 
182
- async def batch_process_file(self, hdfs_dir: str, process_func: Callable[[dict], Any]):
202
+ async def batch_process_file(self, hdfs_dir: str, process_func: Callable[[dict], Any], write_dir: str = None):
183
203
  """批量更新所有 gz 文件"""
184
204
  gz_files = self.list_gz_files(hdfs_dir)
185
205
  all_succeed = True
@@ -187,7 +207,7 @@ class HDFSDataProcessor:
187
207
  if self.is_file_processed(hdfs_file_path):
188
208
  print(f"跳过已处理文件: {hdfs_file_path}")
189
209
  continue # 如果文件已处理,跳过
190
- succeed = await self.retry_process_file(hdfs_file_path, process_func) # 处理文件
210
+ succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
191
211
  if succeed is False:
192
212
  all_succeed = False
193
213
 
@@ -276,7 +296,7 @@ class HDFSDataProcessor:
276
296
  except Exception as e:
277
297
  retry_count += 1
278
298
  print(f"处理文件 {hdfs_file_path} 时发生错误: {e},正在重试 {retry_count}/{self.retry_limit}")
279
- await asyncio.sleep(2 ** retry_count)
299
+ await asyncio.sleep(2**retry_count)
280
300
  print(f"处理文件 {hdfs_file_path} 失败,达到重试上限")
281
301
  return False
282
302
 
@@ -301,6 +321,7 @@ class HDFSDataProcessor:
301
321
  except Exception as e:
302
322
  print(f"删除断点重试文件失败: {e}")
303
323
 
324
+
304
325
  # # 使用示例
305
326
  # async def update_refer(data: dict):
306
327
  # ref_id = data["ref_id"]
@@ -0,0 +1,66 @@
1
+ import itertools
2
+ from typing import List, Any, Tuple
3
+
4
+
5
+ def check_no_duplicates_2d(lst_2d):
6
+ """
7
+ 检查二维列表的每一行是否无重复
8
+ 如果有重复值 返回 False
9
+ 如果没有重复 返回True
10
+ """
11
+ for row in lst_2d:
12
+ # 将行转为集合,比较长度
13
+ if len(row) != len(set(row)):
14
+ return False
15
+ return True
16
+
17
+
18
+ def generate_cross_list_combinations(lists: List[List[Any]]) -> List[Tuple[Any, Any]]:
19
+ """
20
+ 生成不同列表间的所有两两组合(元组长度为2)
21
+
22
+ 参数:
23
+ lists: 包含多个列表的列表,例如 [[1,2], ['a','b'], ['x','y']]
24
+
25
+ 返回:
26
+ 包含所有跨列表两两组合的列表,每个组合是一个元组
27
+ 例如 [(1,'a'), (1,'b'), (2,'a'), ..., ('a','x'), ('a','y'), ...]
28
+ """
29
+ combinations = []
30
+ for i in range(len(lists)):
31
+ for j in range(i + 1, len(lists)):
32
+ combinations.extend(itertools.product(lists[i], lists[j]))
33
+ return combinations
34
+
35
+
36
+ def filter_and_sort_by_smi(all_list, top_n=1000):
37
+
38
+ """
39
+ 要求 list 里面第一个是比较大小的数据 第二个是实际数据
40
+ """
41
+
42
+ # 1. 去重:按 doc_id 去重,保留 smi 最大的记录
43
+ unique_dict = {}
44
+ for smi, doc_id in all_list:
45
+ if doc_id not in unique_dict or smi > unique_dict[doc_id][0]:
46
+ unique_dict[doc_id] = (smi, doc_id)
47
+
48
+ # 2. 转换为列表并排序
49
+ unique_list = sorted(unique_dict.values(), key=lambda x: x[0], reverse=True)
50
+
51
+ # 3. 取前 top_n 个
52
+ return unique_list[:top_n]
53
+
54
+
55
+ def list_to_dict(list_data,key_name):
56
+ # 使用 defaultdict 来处理重复 id
57
+ from collections import defaultdict
58
+
59
+ dict_data = defaultdict(list)
60
+
61
+ for item in list_data:
62
+ dict_data[item[key_name]].append(item)
63
+
64
+ # 将 defaultdict 转换成普通字典
65
+ dict_data = dict(dict_data)
66
+ return dict_data
@@ -177,7 +177,14 @@ def deal_num(num_str):
177
177
  if num_str.lower().startswith("n "):
178
178
  num_str = num_str.lower().replace("n ", "").strip()
179
179
 
180
+ num_str = num_str.lower().replace("special_issue_", '').replace("_special_issue", '').replace("issue", "")
181
+ num_str = num_str.replace("spec.", "").replace("iss.", "").replace("spl.", "").replace("special.", "").replace(
182
+ "specialissue.", "")
183
+ num_str = num_str.replace("spec", "").replace("iss", "").replace("spl", "").replace("special", "").replace(
184
+ "specialissue", '')
185
+
180
186
  num_str = num_str.replace("-", "_").replace(".", "_").upper()
187
+ num_str = num_str.lstrip("_").rstrip("_")
181
188
  if num_str.find("_") > -1:
182
189
  start, end = num_str.split("_")
183
190
  start = deal_num_strs(start)
@@ -186,4 +193,4 @@ def deal_num(num_str):
186
193
  else:
187
194
  num_str = deal_num_strs(num_str)
188
195
 
189
- return num_str
196
+ return num_str.lower().strip()
@@ -0,0 +1,158 @@
1
+ import Levenshtein
2
+ import jellyfish
3
+ from rapidfuzz.distance import DamerauLevenshtein, Hamming, Indel, LCSseq, OSA
4
+
5
+
6
+ class BaseStringSimilarity(object):
7
+
8
+ @classmethod
9
+ def levenshtein_similarity(cls, str1, str2) -> float:
10
+ """
11
+ 返回 两个字字符串之间的编辑距离 分数
12
+ """
13
+ # 编辑距离长度
14
+ distance = Levenshtein.distance(str1, str2)
15
+ # 以最长字符串为除数算分
16
+ similarity = 1 - (distance / max(len(str1), len(str2)))
17
+ return similarity
18
+
19
+ @classmethod
20
+ def damerau_normalized_distance_similarity(cls, str1, str2) -> float:
21
+ """
22
+ # 计算 归一化的编辑距离,取值范围 [0, 1],值越小表示越相似。 一般不以小评估分 所以不用
23
+ similarity = DamerauLevenshtein.normalized_distance(str1, str2)
24
+ 作用:计算 相似度得分,取值范围 [0, max_len],值越大表示越相似。
25
+ print(DamerauLevenshtein.similarity(str1, str2))
26
+ """
27
+ # 该算法与 cls.levenshtein_similarity 算法一致 只是 编辑距离的得值不一样
28
+ similarity = DamerauLevenshtein.normalized_similarity(str1, str2)
29
+ return similarity
30
+
31
+ @classmethod
32
+ def indel_levenshtein_similarity(cls, str1, str2) -> float:
33
+ """
34
+ 本质上使用的 是 Indel.normalized_similarity(str1,str2) 方法
35
+
36
+ 计算 str1 和 str2 之间的 Indel 距离(插入和删除操作的最小次数)
37
+ Indel.distance(str1, str2)
38
+ 计算 标准化后的 Indel 距离,取值范围在 [0, 1] 之间,其中 0 表示完全相同,1 表示完全不同。 ``distance / (len1 + len2)``.
39
+ Indel.normalized_distance(str1, str2)
40
+ 计算 [max, 0] 范围内的 Indel 相似度。计算公式为“(len1 + len2) - distance”
41
+ Indel.similarity(str1, str2)
42
+ 计算 [0, 1] 范围内的归一化插入/缺失相似度。计算公式为“1 - normalized_distance”
43
+ Indel.normalized_similarity(str1, str2)
44
+
45
+ """
46
+ # 计算相似度(0到1之间的值,1表示完全相同)
47
+ similarity = Levenshtein.ratio(str1, str2)
48
+ return similarity
49
+
50
+ @classmethod
51
+ def jaro_similarity(cls, str1, str2) -> float:
52
+ """
53
+ Jaro 相似度是一种用于测量两个字符串相似度的算法,主要考虑:
54
+ 匹配的字符
55
+ 字符顺序
56
+ 字符转置(位置交换)
57
+
58
+ 与 Jaro.normalized_similarity(str1,str2) 一致
59
+ """
60
+ return jellyfish.jaro_similarity(str1, str2)
61
+
62
+ @classmethod
63
+ def jaro_winkler_similarity(cls, str1, str2) -> float:
64
+ """
65
+ Jaro-Winkler 是 Jaro 的改进版,对前缀匹配给予更多权重
66
+
67
+ 与 JaroWinkler.normalized_similarity(str1,str2) 结果一致
68
+
69
+ print(JaroWinkler.distance(str1, str2))
70
+ 与 print(JaroWinkler.normalized_distance(str1, str2)) 结果一致
71
+
72
+ print(JaroWinkler.similarity(str1, str2))
73
+ 与 print(JaroWinkler.normalized_similarity(str1,str2)) 结果一致
74
+ """
75
+ return jellyfish.jaro_winkler_similarity(str1, str2)
76
+
77
+ @classmethod
78
+ def osa_similarity(cls, str1, str2) -> float:
79
+ """
80
+ 计算 [0, 1] 范围内的归一化最佳字符串比对 (OSA) 相似度。
81
+
82
+ 计算公式为“1 - normalized_distance”
83
+ """
84
+ return OSA.normalized_similarity(str1, str2)
85
+
86
+ @classmethod
87
+ def lcs_seq_similarity(cls, str1, str2) -> float:
88
+ """
89
+ 计算 [0, 1] 范围内的归一化 LCS 相似度。
90
+ 计算公式为“1 - normalized_distance”
91
+ """
92
+ return LCSseq.normalized_similarity(str1, str2)
93
+
94
+ @classmethod
95
+ def lcs_seq_distance(cls, str1, str2) -> int:
96
+ """
97
+ LCSseq.distance 是 RapidFuzz 库中的一个方法,用于计算两个字符串之间的 最长公共子序列(Longest Common Subsequence, LCS)距离。
98
+ LCS 是指两个字符串中 按顺序出现但不一定连续 的最长子序列。例如:
99
+ "abcde" 和 "ace" 的 LCS 是 "ace"(长度 3)。
100
+ "Druitt, Robert" 和 "Druitt R." 的 LCS 可能是 "Druitt R"(长度 8)。
101
+ 计算 [0, max] 范围内的 LCS 距离。
102
+ 计算公式为“max(len1, len2) - 相似度”。
103
+ """
104
+ return LCSseq.distance(str1, str2)
105
+
106
+ @classmethod
107
+ def osa_distance(cls, str1, str2) -> int:
108
+ """
109
+ OSA.distance(Optimal String Alignment,最优字符串对齐距离)是 RapidFuzz 库中的一个方法,用于计算两个字符串之间的 编辑距离(Edit Distance),但比标准的 Levenshtein 距离 限制更严格。
110
+
111
+ OSA 额外允许 相邻字符交换(Transposition),但限制比 Damerau-Levenshtein 更严格(Damerau 允许多次交换,而 OSA 仅限一次)。
112
+ """
113
+ return OSA.distance(str1, str2)
114
+
115
+ @classmethod
116
+ def levenshtein_distance(cls, str1, str2) -> int:
117
+ """
118
+ 返回 两个字字符串之间的编辑距离 分数
119
+ 标准 Levenshtein 距离 允许 插入、删除、替换 三种操作,但不允许 相邻字符交换(transposition)
120
+
121
+ jellyfish.levenshtein_distance(str1,str2) 该方法结果与 本方法一致
122
+
123
+ print(Jaro.distance(str1, str2))
124
+ 与 print(Jaro.normalized_distance(str1, str2)) 结果一致
125
+
126
+ print(Jaro.similarity(str1, str2))
127
+ 与 print(Jaro.normalized_similarity(str1,str2)) 结果一致
128
+ """
129
+ # 编辑距离长度
130
+ distance = Levenshtein.distance(str1, str2)
131
+ print(jellyfish.levenshtein_distance(str1, str2))
132
+ return distance
133
+
134
+ @classmethod
135
+ def indel_distance(cls, str1, str2) -> int:
136
+ """
137
+ Indel(Insertion + Deletion)距离是 仅考虑插入和删除操作 的编辑距离,不考虑替换操作。
138
+ """
139
+ return Indel.distance(str1, str2)
140
+
141
+ @classmethod
142
+ def damerau_levenshtein_distance(cls, str1, str2) -> int:
143
+ """
144
+ Damerau-Levenshtein 距离是 Levenshtein 距离的修改,它将换位(例如将 ifsh 表示为 fish)计为一次编辑
145
+ """
146
+ # 编辑距离长度
147
+ distance = jellyfish.damerau_levenshtein_distance(str1, str2)
148
+ print(DamerauLevenshtein.distance(str1, str2))
149
+ return distance
150
+
151
+ @classmethod
152
+ def hamming_distance(cls, str1, str2) -> int:
153
+ return Hamming.distance(str1, str2)
154
+
155
+ # str1 = "primulina elegant ladyis a new culitvar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate"
156
+ # str2 = "primulinaelegant labyis a new cultivar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate 2019 editorial office of acta horticulturae sinica all rights reserved"
157
+ # # str1 = "primulina elegant ladyis a new cultivar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate"
158
+ # # str2 = "primulinaelegant ladyis a new cultivar developed by crossing seed parent primulina medica and pollen parent primulina longii it has fresh and elegant flowershigh ornamental value and strong shade tolerance it is easy to cultivate and propagate 2019 editorial office of acta horticulturae sinica all rights reserved"
@@ -2,7 +2,7 @@ from contextlib import asynccontextmanager
2
2
  from typing import AsyncGenerator, Tuple
3
3
 
4
4
  import aiomysql
5
- from aiomysql import Pool, Connection, Cursor
5
+ from aiomysql import Pool, Connection, Cursor, DictCursor
6
6
 
7
7
  DB_CONFIG = {
8
8
  'host': '192.168.98.55',
@@ -99,4 +99,51 @@ def is_all_symbols(text):
99
99
  return False
100
100
 
101
101
  # 检查每个字符是否属于符号类别
102
- return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
102
+ return all(unicodedata.category(char).startswith(('P', 'S')) for char in text)
103
+
104
+
105
+ def is_whole_word_en(sub_str: str, long_str: str) -> bool:
106
+ """
107
+ 判断 sub_str 在 long_str 中是否是一个完整的单词(而不是其他单词的一部分)。
108
+
109
+ 参数:
110
+ sub_str: 要搜索的单词
111
+ long_str: 被搜索的文本
112
+
113
+ 返回:
114
+ bool: 如果 sub_str 是 long_str 中的一个完整单词则返回 True,否则返回 False
115
+ """
116
+ regex_pattern = re.compile(r"[^a-z0-9]")
117
+
118
+ if not sub_str or not long_str:
119
+ return False
120
+
121
+ # 使用 startsWith 和 endsWith 检查边界
122
+ if long_str.startswith(sub_str) and long_str.endswith(sub_str):
123
+ return True
124
+
125
+ # 检查是否在中间位置,且前后有非字母数字字符
126
+ index = long_str.find(sub_str)
127
+ if index >= 0:
128
+ if index == 0:
129
+ is_start = True
130
+ else:
131
+ is_start = bool(regex_pattern.match(long_str[index - 1]))
132
+
133
+ if len(long_str) == len(sub_str) + index:
134
+ is_end = True
135
+ else:
136
+ is_end = bool(regex_pattern.match(long_str[index + len(sub_str)]))
137
+
138
+ return is_start and is_end
139
+ else:
140
+ return False
141
+
142
+
143
+ def is_whole_word(sub_str: str, long_str: str) -> bool:
144
+ if contains_chinese_chars(sub_str):
145
+ return True
146
+ elif is_whole_word_en(sub_str, long_str):
147
+ return True
148
+ else:
149
+ return False
@@ -6,6 +6,11 @@ import regex
6
6
  from re_common.v2.baselibrary.utils.stringutils import qj2bj, bj2qj, get_diacritic_variant, clean_html, \
7
7
  remove_spaces_between_chinese_characters
8
8
 
9
+ from opencc import OpenCC
10
+
11
+ # pip install opencc-python-reimplemented
12
+ cc = OpenCC("t2s") # t2s是繁体转简体
13
+
9
14
 
10
15
  class StringClear(object):
11
16
 
@@ -31,6 +36,11 @@ class StringClear(object):
31
36
  self.obj_str = bj2qj(self.obj_str)
32
37
  return self
33
38
 
39
+ def convert_to_simplified(self):
40
+ # 繁体转简体
41
+ self.obj_str = cc.convert(self.obj_str)
42
+ return self
43
+
34
44
  def lower(self):
35
45
  self.obj_str = self.obj_str.lower()
36
46
  return self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.15
3
+ Version: 10.0.17
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -188,6 +188,7 @@ re_common/v2/baselibrary/tools/unionfind_tools.py
188
188
  re_common/v2/baselibrary/utils/BusinessStringUtil.py
189
189
  re_common/v2/baselibrary/utils/__init__.py
190
190
  re_common/v2/baselibrary/utils/author_smi.py
191
+ re_common/v2/baselibrary/utils/base_string_similarity.py
191
192
  re_common/v2/baselibrary/utils/basedict.py
192
193
  re_common/v2/baselibrary/utils/basehdfs.py
193
194
  re_common/v2/baselibrary/utils/basepika.py
@@ -34,7 +34,7 @@ long_description = """
34
34
  """
35
35
  setuptools.setup(
36
36
  name="re_common",
37
- version="10.0.15",
37
+ version="10.0.17",
38
38
  author="vic",
39
39
  author_email="xujiang5@163.com",
40
40
  description="a library about all python projects",
@@ -1,9 +0,0 @@
1
-
2
- def check_no_duplicates_2d(lst_2d):
3
- """检查二维列表的每一行是否无重复"""
4
- for row in lst_2d:
5
- # 将行转为集合,比较长度
6
- if len(row) != len(set(row)):
7
- return False
8
- return True
9
-
File without changes
File without changes