re-common 10.0.40__tar.gz → 10.0.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. {re_common-10.0.40/re_common.egg-info → re_common-10.0.41}/PKG-INFO +2 -10
  2. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +17 -2
  3. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/data_processer/data_processer.py +52 -63
  4. re_common-10.0.41/re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
  5. re_common-10.0.41/re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
  6. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/basetime.py +17 -0
  7. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/db.py +19 -1
  8. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/string_bool.py +2 -1
  9. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/stringutils.py +41 -0
  10. re_common-10.0.41/re_common/vip/proxy/__init__.py +0 -0
  11. {re_common-10.0.40 → re_common-10.0.41/re_common.egg-info}/PKG-INFO +2 -10
  12. {re_common-10.0.40 → re_common-10.0.41}/re_common.egg-info/SOURCES.txt +3 -0
  13. {re_common-10.0.40 → re_common-10.0.41}/setup.py +1 -1
  14. {re_common-10.0.40 → re_common-10.0.41}/LICENSE +0 -0
  15. {re_common-10.0.40 → re_common-10.0.41}/README.md +0 -0
  16. {re_common-10.0.40 → re_common-10.0.41}/pyproject.toml +0 -0
  17. {re_common-10.0.40 → re_common-10.0.41}/re_common/__init__.py +0 -0
  18. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/__init__.py +0 -0
  19. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/baseabs/__init__.py +0 -0
  20. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/baseabs/baseabs.py +0 -0
  21. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/database/__init__.py +0 -0
  22. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/database/mbuilder.py +0 -0
  23. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/database/moudle.py +0 -0
  24. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/database/msqlite3.py +0 -0
  25. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/database/mysql.py +0 -0
  26. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/database/sql_factory.py +0 -0
  27. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/mthread/MThreadingRun.py +0 -0
  28. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -0
  29. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/mthread/__init__.py +0 -0
  30. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/mthread/mythreading.py +0 -0
  31. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/pakge_other/__init__.py +0 -0
  32. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/pakge_other/socks.py +0 -0
  33. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/readconfig/__init__.py +0 -0
  34. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/readconfig/config_factory.py +0 -0
  35. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/readconfig/ini_config.py +0 -0
  36. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/readconfig/toml_config.py +0 -0
  37. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/temporary/__init__.py +0 -0
  38. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/temporary/envdata.py +0 -0
  39. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/__init__.py +0 -0
  40. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  41. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -0
  42. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -0
  43. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/all_requests/mrequest.py +0 -0
  44. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/all_requests/requests_request.py +0 -0
  45. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  46. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -0
  47. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/contrast_db3.py +0 -0
  48. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/copy_file.py +0 -0
  49. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/db3_2_sizedb3.py +0 -0
  50. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/foreachgz.py +0 -0
  51. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/get_attr.py +0 -0
  52. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/image_to_pdf.py +0 -0
  53. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/java_code_deal.py +0 -0
  54. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/javacode.py +0 -0
  55. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mdb_db3.py +0 -0
  56. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/merge_file.py +0 -0
  57. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/merge_gz_file.py +0 -0
  58. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  59. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -0
  60. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -0
  61. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -0
  62. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mongo_tools.py +0 -0
  63. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/move_file.py +0 -0
  64. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  65. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -0
  66. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -0
  67. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -0
  68. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/move_mongo/use_mv.py +0 -0
  69. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  70. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -0
  71. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -0
  72. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/myparsel.py +0 -0
  73. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/rename_dir_file.py +0 -0
  74. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/sequoiadb_utils.py +0 -0
  75. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/split_line_to_many.py +0 -0
  76. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/stringtodicts.py +0 -0
  77. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/tools/workwechant_bot.py +0 -0
  78. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/__init__.py +0 -0
  79. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseaiohttp.py +0 -0
  80. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseaiomysql.py +0 -0
  81. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseallstep.py +0 -0
  82. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseavro.py +0 -0
  83. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseboto3.py +0 -0
  84. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basecsv.py +0 -0
  85. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basedict.py +0 -0
  86. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basedir.py +0 -0
  87. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseencode.py +0 -0
  88. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseencoding.py +0 -0
  89. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseesdsl.py +0 -0
  90. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseexcel.py +0 -0
  91. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseexcept.py +0 -0
  92. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basefile.py +0 -0
  93. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseftp.py +0 -0
  94. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basegzip.py +0 -0
  95. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basehdfs.py +0 -0
  96. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basehttpx.py +0 -0
  97. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseip.py +0 -0
  98. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basejson.py +0 -0
  99. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baselist.py +0 -0
  100. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basemotor.py +0 -0
  101. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basemssql.py +0 -0
  102. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseodbc.py +0 -0
  103. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basepandas.py +0 -0
  104. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basepeewee.py +0 -0
  105. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basepika.py +0 -0
  106. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basepydash.py +0 -0
  107. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basepymongo.py +0 -0
  108. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basequeue.py +0 -0
  109. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baserar.py +0 -0
  110. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baserequest.py +0 -0
  111. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseset.py +0 -0
  112. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basesmb.py +0 -0
  113. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basestring.py +0 -0
  114. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basetime.py +0 -0
  115. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basetuple.py +0 -0
  116. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/baseurl.py +0 -0
  117. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/basezip.py +0 -0
  118. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/core/__init__.py +0 -0
  119. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/core/bottomutils.py +0 -0
  120. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/core/mdeprecated.py +0 -0
  121. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/core/mlamada.py +0 -0
  122. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/core/msginfo.py +0 -0
  123. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/core/requests_core.py +0 -0
  124. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/fateadm.py +0 -0
  125. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/importfun.py +0 -0
  126. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/mfaker.py +0 -0
  127. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/my_abc/__init__.py +0 -0
  128. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/my_abc/better_abc.py +0 -0
  129. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/mylogger.py +0 -0
  130. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/myredisclient.py +0 -0
  131. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/pipupgrade.py +0 -0
  132. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/ringlist.py +0 -0
  133. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/version_compare.py +0 -0
  134. {re_common-10.0.40 → re_common-10.0.41}/re_common/baselibrary/utils/ydmhttp.py +0 -0
  135. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/__init__.py +0 -0
  136. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/lazy_import.py +0 -0
  137. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/loggerfacade.py +0 -0
  138. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/mysqlfacade.py +0 -0
  139. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/now.py +0 -0
  140. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/sqlite3facade.py +0 -0
  141. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/use/__init__.py +0 -0
  142. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/use/mq_use_facade.py +0 -0
  143. {re_common-10.0.40 → re_common-10.0.41}/re_common/facade/use/proxy_use_facade.py +0 -0
  144. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/__init__.py +0 -0
  145. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/base_dict_test.py +0 -0
  146. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/baseavro_test.py +0 -0
  147. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/basefile_test.py +0 -0
  148. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/basemssql_test.py +0 -0
  149. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/baseodbc_test.py +0 -0
  150. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/basepandas_test.py +0 -0
  151. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/get_attr_test/__init__.py +0 -0
  152. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -0
  153. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/get_attr_test/settings.py +0 -0
  154. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/idencode_test.py +0 -0
  155. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/iniconfig_test.py +0 -0
  156. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/ip_test.py +0 -0
  157. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/merge_file_test.py +0 -0
  158. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/mfaker_test.py +0 -0
  159. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/mm3_test.py +0 -0
  160. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/mylogger_test.py +0 -0
  161. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/myparsel_test.py +0 -0
  162. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/mysql_test.py +0 -0
  163. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/pymongo_test.py +0 -0
  164. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/split_test.py +0 -0
  165. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/sqlite3_merge_test.py +0 -0
  166. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/sqlite3_test.py +0 -0
  167. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/tomlconfig_test.py +0 -0
  168. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/use_tools_test/__init__.py +0 -0
  169. {re_common-10.0.40 → re_common-10.0.41}/re_common/libtest/user/__init__.py +0 -0
  170. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/__init__.py +0 -0
  171. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/assignment_expressions.py +0 -0
  172. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/mydash/__init__.py +0 -0
  173. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/mydash/test1.py +0 -0
  174. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/pydashstudio/__init__.py +0 -0
  175. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/pydashstudio/first.py +0 -0
  176. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/streamlitstudio/__init__.py +0 -0
  177. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/streamlitstudio/first_app.py +0 -0
  178. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/streamlitstudio/uber_pickups.py +0 -0
  179. {re_common-10.0.40 → re_common-10.0.41}/re_common/studio/test.py +0 -0
  180. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/__init__.py +0 -0
  181. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/__init__.py +0 -0
  182. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/business_utils/__init__.py +0 -0
  183. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/business_utils/baseencodeid.py +0 -0
  184. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/business_utils/full_doi_path.py +0 -0
  185. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/business_utils/rel_tools.py +0 -0
  186. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/decorators/__init__.py +0 -0
  187. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/decorators/utils.py +0 -0
  188. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/helpers/__init__.py +0 -0
  189. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +0 -0
  190. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/helpers/search_packge/__init__.py +0 -0
  191. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +0 -0
  192. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +0 -0
  193. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/helpers/search_packge/test.py +0 -0
  194. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/s3object/__init__.py +0 -0
  195. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/s3object/baseboto3.py +0 -0
  196. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/WeChatRobot.py +0 -0
  197. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/__init__.py +0 -0
  198. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/ac_ahocorasick.py +0 -0
  199. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/concurrency.py +0 -0
  200. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/data_processer/__init__.py +0 -0
  201. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/data_processer/base.py +0 -0
  202. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/data_processer/data_reader.py +0 -0
  203. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/data_processer/data_writer.py +0 -0
  204. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/dict_tools.py +0 -0
  205. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/dolphinscheduler.py +0 -0
  206. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/hdfs_base_processor.py +0 -0
  207. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +0 -0
  208. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/hdfs_data_processer.py +0 -0
  209. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/hdfs_line_processor.py +0 -0
  210. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/list_tools.py +0 -0
  211. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/resume_tracker.py +0 -0
  212. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/search_hash_tools.py +0 -0
  213. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/text_matcher.py +0 -0
  214. {re_common-10.0.40/re_common/v2/baselibrary/utils → re_common-10.0.41/re_common/v2/baselibrary/tools/tree_processor}/__init__.py +0 -0
  215. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/tools/unionfind_tools.py +0 -0
  216. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/BusinessStringUtil.py +0 -0
  217. {re_common-10.0.40/re_common/vip → re_common-10.0.41/re_common/v2/baselibrary/utils}/__init__.py +0 -0
  218. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/api_net_utils.py +0 -0
  219. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/author_smi.py +0 -0
  220. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/base_string_similarity.py +0 -0
  221. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/basedict.py +0 -0
  222. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/basehdfs.py +0 -0
  223. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/basepika.py +0 -0
  224. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/elasticsearch.py +0 -0
  225. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/json_cls.py +0 -0
  226. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/mq.py +0 -0
  227. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/n_ary_expression_tree.py +0 -0
  228. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/string_clear.py +0 -0
  229. {re_common-10.0.40 → re_common-10.0.41}/re_common/v2/baselibrary/utils/string_smi.py +0 -0
  230. {re_common-10.0.40/re_common/vip/proxy → re_common-10.0.41/re_common/vip}/__init__.py +0 -0
  231. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/base_step_process.py +0 -0
  232. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/baseencodeid.py +0 -0
  233. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/changetaskname.py +0 -0
  234. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/core_var.py +0 -0
  235. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/mmh3Hash.py +0 -0
  236. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/allproxys.py +0 -0
  237. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/allproxys_thread.py +0 -0
  238. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/cnki_proxy.py +0 -0
  239. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/kuaidaili.py +0 -0
  240. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/proxy_all.py +0 -0
  241. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/update_kuaidaili_0.py +0 -0
  242. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/wanfang_proxy.py +0 -0
  243. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/proxy/wp_proxy_all.py +0 -0
  244. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/read_rawid_to_txt.py +0 -0
  245. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/__init__.py +0 -0
  246. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformBookTitleToZt.py +0 -0
  247. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -0
  248. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -0
  249. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -0
  250. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -0
  251. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -0
  252. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -0
  253. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -0
  254. {re_common-10.0.40 → re_common-10.0.41}/re_common/vip/title/transform/__init__.py +0 -0
  255. {re_common-10.0.40 → re_common-10.0.41}/re_common.egg-info/dependency_links.txt +0 -0
  256. {re_common-10.0.40 → re_common-10.0.41}/re_common.egg-info/top_level.txt +0 -0
  257. {re_common-10.0.40 → re_common-10.0.41}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.40
3
+ Version: 10.0.41
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -11,14 +11,6 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.6
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Dynamic: author
15
- Dynamic: author-email
16
- Dynamic: classifier
17
- Dynamic: description
18
- Dynamic: description-content-type
19
- Dynamic: home-page
20
- Dynamic: requires-python
21
- Dynamic: summary
22
14
 
23
15
 
24
16
  这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
@@ -1,6 +1,9 @@
1
1
  # 某些业务中的字符串处理 算是特定场景的工具 不算通用工具
2
+ import itertools
2
3
  import re
3
4
 
5
+ from rapidfuzz.fuzz import partial_token_set_ratio
6
+
4
7
  from re_common.v2.baselibrary.utils.author_smi import AuthorRatio
5
8
  from re_common.v2.baselibrary.utils.string_bool import is_all_symbols
6
9
  from re_common.v2.baselibrary.utils.string_clear import rel_clear
@@ -199,7 +202,7 @@ def deal_num(num_str):
199
202
  return num_str.lower().strip()
200
203
 
201
204
 
202
- def clear_author_1st(author_str:str):
205
+ def clear_author_1st(author_str: str):
203
206
  # 清理括号 防止前面流程没有清理干净
204
207
  author_str = re.sub("\\[.*?]", "", author_str)
205
208
  author_str = re.sub("\\(.*?\\)", "", author_str)
@@ -209,6 +212,7 @@ def clear_author_1st(author_str:str):
209
212
 
210
213
  return author_str
211
214
 
215
+
212
216
  def is_same_author(a1, a2):
213
217
  if get_alphabetic_ratio(a1.strip()) > 0.7 and get_alphabetic_ratio(a2.strip()) > 0.7:
214
218
  author_similar_ = AuthorRatio(a1.strip(), a2.strip())
@@ -217,4 +221,15 @@ def is_same_author(a1, a2):
217
221
  else:
218
222
  if rel_clear(a1.strip()) == rel_clear(a2.strip()):
219
223
  return True
220
- return False
224
+ return False
225
+
226
+
227
+ def abs_smi(abs_l1, abs_l2):
228
+ abs_l1 = [cleared for cleared in map(rel_clear, abs_l1) if cleared]
229
+ abs_l2 = [cleared for cleared in map(rel_clear, abs_l2) if cleared]
230
+ lists_max = []
231
+ for abs1, abs2 in list(itertools.product(abs_l1, abs_l2)):
232
+ max_smi = partial_token_set_ratio(abs1, abs2, processor=rel_clear)
233
+ lists_max.append(max_smi)
234
+
235
+ return max(lists_max)
@@ -43,6 +43,15 @@ class DatabaseHandler:
43
43
  )
44
44
  conn.commit()
45
45
 
46
+ def get_processed_files_count(self):
47
+ """查看db3存储了多少成功的记录"""
48
+ with FileLock(self.lock_file):
49
+ with sqlite3.connect(self.db_file) as conn:
50
+ cursor = conn.cursor()
51
+ cursor.execute("SELECT COUNT(*) FROM processed_files")
52
+ count = cursor.fetchone()[0]
53
+ return count
54
+
46
55
  def save_processed_files_many(self, file_paths):
47
56
  """批量保存处理过的文件路径"""
48
57
  if not file_paths:
@@ -110,6 +119,21 @@ class DatabaseHandler:
110
119
  print(f"伪造处理记录时出错: {str(e)}")
111
120
 
112
121
 
122
+ def on_retry(retry_state):
123
+ # 每次抛错进入该函数打印消息
124
+ exc = retry_state.outcome.exception()
125
+ tb = ''.join(traceback.format_exception(type(exc), exc, exc.__traceback__))
126
+ print(tb)
127
+ print(
128
+ f"处理文件 {retry_state.args[0]} 时发生错误: {exc},正在重试 {retry_state.attempt_number}")
129
+
130
+
131
+ def on_retry_error(retry_state):
132
+ # 最后抛错后调用
133
+ print(f"处理文件 {retry_state.args[0]} 失败,达到重试上限")
134
+ return False
135
+
136
+
113
137
  class DataProcessor:
114
138
  def __init__(
115
139
  self,
@@ -249,38 +273,6 @@ class DataProcessor:
249
273
  for file_path in all_files:
250
274
  yield file_path
251
275
 
252
- @retry(stop=stop_after_attempt(3),
253
- wait=wait_random(min=10, max=30),
254
- retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
255
- reraise=True)
256
- async def _batch_process_file(self, hdfs_file_path: str, process_func: Callable[[str], Any],
257
- write_dir: str = None):
258
- """批量更新所有 gz 文件"""
259
- # all_succeed = True
260
- # for hdfs_file_path in self.get_file_list(hdfs_dir):
261
- # if self.db_handler.is_file_processed(hdfs_file_path):
262
- # print(f"跳过已处理文件: {hdfs_file_path}")
263
- # continue # 如果文件已处理,跳过
264
- # succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
265
- # if succeed is False:
266
- # all_succeed = False
267
- #
268
- # if all_succeed:
269
- # # 处理完成后删除数据库文件
270
- # try:
271
- # if os.path.exists(self.db_file):
272
- # os.remove(self.db_file)
273
- # print(f"已删除断点重试文件: {self.db_file}")
274
- # return True
275
- # except Exception as e:
276
- # print(f"删除断点重试文件失败: {e}")
277
- # return False
278
- if self.db_handler.is_file_processed(hdfs_file_path):
279
- print(f"跳过已处理文件: {hdfs_file_path}")
280
- return True # 如果文件已处理,跳过
281
- succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
282
- return succeed
283
-
284
276
  async def process_file_bulk(self, hdfs_file_path, process_func, write_dir):
285
277
  """按批次处理单个文件,批量数据传递给处理函数"""
286
278
  # 获取文件的数据总量
@@ -391,36 +383,6 @@ class DataProcessor:
391
383
  succeed = await self._batch_process_file(hdfs_file_path, process_func, write_dir)
392
384
  if succeed is False:
393
385
  all_succeed = False
394
- if all_succeed:
395
- # 处理完成后删除数据库文件
396
- try:
397
- if os.path.exists(self.db_file):
398
- os.remove(self.db_file)
399
- print(f"已删除断点重试文件: {self.db_file}")
400
- return True
401
- except Exception as e:
402
- print(f"删除断点重试文件失败: {e}")
403
- return False
404
-
405
- @retry(stop=stop_after_attempt(3),
406
- wait=wait_random(min=10, max=30),
407
- retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
408
- reraise=True)
409
- async def _batch_process_file_bulk(self, hdfs_file_path: str, process_func: Callable[[List[str]], Any],
410
- write_dir: str = None):
411
- """批量处理 gz 文件中的数据"""
412
- # 获取所有文件
413
- # all_succeed = True
414
- # for hdfs_file_path in self.get_file_list(hdfs_dir):
415
- # # 查看是否跳过文件
416
- # if self.db_handler.is_file_processed(hdfs_file_path):
417
- # print(f"跳过已处理文件: {hdfs_file_path}")
418
- # continue # 跳过已处理文件
419
- # # 开始批量处理文件
420
- # succeed = await self.retry_process_file_bulk(hdfs_file_path, process_func, write_dir)
421
- # if succeed is False:
422
- # all_succeed = False
423
- #
424
386
  # if all_succeed:
425
387
  # # 处理完成后删除数据库文件
426
388
  # try:
@@ -430,7 +392,17 @@ class DataProcessor:
430
392
  # return True
431
393
  # except Exception as e:
432
394
  # print(f"删除断点重试文件失败: {e}")
433
- # return False
395
+ return all_succeed
396
+
397
+ @retry(stop=stop_after_attempt(3),
398
+ wait=wait_random(min=10, max=30),
399
+ # retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
400
+ before_sleep=on_retry, # 每次抛错后使用
401
+ retry_error_callback=on_retry_error, # 如果最后没成功 返回 False
402
+ reraise=True) # 如果函数一直失败,重试结束时会 重新抛出最后一次调用时的原始异常。
403
+ async def _batch_process_file_bulk(self, hdfs_file_path: str, process_func: Callable[[List[str]], Any],
404
+ write_dir: str = None):
405
+ """批量处理 gz 文件中的数据"""
434
406
  # 查看是否跳过文件
435
407
  if self.db_handler.is_file_processed(hdfs_file_path):
436
408
  print(f"跳过已处理文件: {hdfs_file_path}")
@@ -439,6 +411,21 @@ class DataProcessor:
439
411
  succeed = await self.retry_process_file_bulk(hdfs_file_path, process_func, write_dir)
440
412
  return succeed
441
413
 
414
+ @retry(stop=stop_after_attempt(3),
415
+ wait=wait_random(min=10, max=30),
416
+ # retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
417
+ before_sleep=on_retry, # 每次抛错后使用
418
+ retry_error_callback=on_retry_error, # 如果最后没成功 返回 False
419
+ reraise=True)
420
+ async def _batch_process_file(self, hdfs_file_path: str, process_func: Callable[[str], Any],
421
+ write_dir: str = None):
422
+ """批量更新所有 gz 文件"""
423
+ if self.db_handler.is_file_processed(hdfs_file_path):
424
+ print(f"跳过已处理文件: {hdfs_file_path}")
425
+ return True # 如果文件已处理,跳过
426
+ succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
427
+ return succeed
428
+
442
429
 
443
430
  # 全局变量,每个进程独立持有
444
431
  _processor: DataProcessor | None = None
@@ -506,3 +493,5 @@ def run_worker_many(hdfs_dir: str, process_func: Callable[[List[str]], Any] | Ca
506
493
  for result in results:
507
494
  if result:
508
495
  print(result)
496
+ db3_count = processor.db_handler.get_processed_files_count()
497
+ print(f"db3文件数据量{db3_count},文件实际数据量{len(all_file)},是否完成全部转移: {db3_count == len(all_file)}")
@@ -0,0 +1,25 @@
1
+ from re_common.v2.baselibrary.tools.tree_processor.node import TreeNode
2
+
3
+
4
+ def build_forest(node_list):
5
+ nodes = {} # cid -> TreeNode
6
+ has_parent = set()
7
+
8
+ # 第一步:创建所有节点
9
+ for cid, pid, count in node_list:
10
+ node = TreeNode(cid, count)
11
+ nodes[cid] = node
12
+ if pid is not None:
13
+ has_parent.add(cid)
14
+
15
+ # 第二步:连接 parent-child
16
+ for cid, pid, _ in node_list:
17
+ if pid is not None and pid in nodes:
18
+ parent = nodes[pid]
19
+ child = nodes[cid]
20
+ parent.children.append(child)
21
+ child.parent = parent
22
+
23
+ # 第三步:找所有根节点(即没有 parent 的)
24
+ roots = [node for cid, node in nodes.items() if node.parent is None]
25
+ return roots # 返回多棵树的根节点列表
@@ -0,0 +1,13 @@
1
+ class TreeNode:
2
+ def __init__(self, cid, count):
3
+ self.id = cid
4
+ self.count = count
5
+ self.children = []
6
+ self.parent = None
7
+
8
+ def add_child(self, child):
9
+ self.children.append(child)
10
+ child.parent = self
11
+
12
+ def is_leaf(self):
13
+ return len(self.children) == 0
@@ -75,3 +75,20 @@ class BaseTime(object):
75
75
  current_time = BaseTime.get_current_beijing_time()
76
76
  last_time = BaseTime.parse_beijing_time(last_time_str)
77
77
  return current_time.hour != last_time.hour
78
+
79
+ @staticmethod
80
+ def is_weekday(num_weekday: int) -> bool:
81
+ """
82
+ 判断当前日期是否为指定星期。
83
+
84
+ 参数:
85
+ num_weekday (int): 表示星期的数字(1=星期一, 2=星期二, ..., 7=星期日)。
86
+
87
+ 返回:
88
+ bool: 如果当前日期不是指定的星期,则返回 True;否则返回 False。
89
+
90
+ 示例:
91
+ 如果 num_weekday=6(星期六),而今天是星期五(weekday()=4),则返回 True。
92
+ """
93
+ current_weekday = datetime.now().weekday() # 获取当前星期(0=星期一, 1=星期二, ..., 6=星期日)
94
+ return current_weekday != num_weekday - 1
@@ -94,7 +94,6 @@ aiomysql_pool = None
94
94
  pool_lock = asyncio.Lock() # 全局异步锁
95
95
 
96
96
 
97
-
98
97
  async def init_aiomysql_pool_async():
99
98
  global aiomysql_pool
100
99
  if aiomysql_pool is None:
@@ -110,6 +109,7 @@ client = None
110
109
  motor_fs_lock = asyncio.Lock() # 全局异步锁
111
110
  _loop_id_mongo = None
112
111
 
112
+
113
113
  async def check_connection(client):
114
114
  try:
115
115
  print("check mongodb client ping")
@@ -142,6 +142,7 @@ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
142
142
  _loop_id_mongo = id(asyncio.get_running_loop())
143
143
  return motor_fs, client
144
144
 
145
+
145
146
  # async def run_main():
146
147
  # while True:
147
148
  # uri = "mongodb://192.168.98.80:27001/wpdc"
@@ -154,3 +155,20 @@ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
154
155
  #
155
156
  # if __name__ == "__main__":
156
157
  # asyncio.run(run_main())
158
+
159
+
160
+ def get_connection(autocommit: bool = True) -> Connection:
161
+ from pymysql import Connection
162
+ from pymysql.cursors import DictCursor
163
+ import pymysql
164
+ db_conf = {
165
+ "host": "192.168.98.55",
166
+ "port": 4000,
167
+ "user": "dataware_house_baseUser",
168
+ "password": "FF19AF831AEBD580B450B16BF9264200",
169
+ "database": "dataware_house_base",
170
+ "autocommit": autocommit,
171
+ "cursorclass": DictCursor,
172
+ }
173
+ conn: Connection = pymysql.connect(**db_conf)
174
+ return conn
@@ -1,5 +1,6 @@
1
1
  import re
2
2
 
3
+ import regex
3
4
  import unicodedata
4
5
 
5
6
 
@@ -14,7 +15,7 @@ def is_all_english_chars(s):
14
15
 
15
16
 
16
17
  def contains_chinese_chars(s):
17
- return bool(re.search(r'[\u3400-\u9fff]', s))
18
+ return bool(regex.search(r"[\p{IsHan}]", s))
18
19
 
19
20
 
20
21
  def is_empty(value):
@@ -128,6 +128,47 @@ def get_alphabetic_ratio(text: str) -> float:
128
128
  return len(alphabetic_chars) / len(clean_text)
129
129
 
130
130
 
131
+ def get_chinese_ratio(text: str, mode: str = "letters_only") -> float:
132
+ """
133
+ 计算中文字符在文本中的比例。
134
+
135
+ 参数:
136
+ - text: 原始文本
137
+ - mode:
138
+ - "letters_only": 只保留所有语言的字母(默认)
139
+ - "letters_numbers": 保留字母 + 所有 Unicode 数字(包括全角数字、罗马数字等)
140
+ - "letters_arabic_numbers": 保留字母 + 阿拉伯数字(0-9)
141
+ - "no_numbers": 只保留字母,排除所有数字
142
+
143
+ 区别
144
+ letters_only: 删除非字母(数字自动被删除,因为不是字母)
145
+ no_numbers: 删除非字母 + 显式再删除所有数字(即使你前面想保留) 这里会一些删除而额外的数字表达 比如罗马数字
146
+
147
+
148
+ 返回:
149
+ - 中文字符占清洗后文本的比例(float)
150
+ """
151
+ if not text:
152
+ return 0.0
153
+
154
+ if mode == "letters_only":
155
+ clean_text = regex.sub(r"[^\p{L}]", "", text)
156
+ elif mode == "letters_numbers":
157
+ clean_text = regex.sub(r"[^\p{L}\p{N}]", "", text)
158
+ elif mode == "letters_arabic_numbers":
159
+ clean_text = regex.sub(r"[^\p{L}0-9]", "", text)
160
+ elif mode == "no_numbers":
161
+ clean_text = regex.sub(r"[^\p{L}]|\p{N}", "", text) # 去掉数字
162
+ else:
163
+ raise ValueError(f"Unsupported mode: {mode}")
164
+
165
+ if len(clean_text) == 0:
166
+ return 0.0
167
+
168
+ chinese_chars = regex.findall(r"[\p{IsHan}]", clean_text)
169
+ return len(chinese_chars) / len(clean_text)
170
+
171
+
131
172
  class HTMLTextExtractor(HTMLParser):
132
173
  _thread_local = threading.local() # 线程局部存储
133
174
 
File without changes
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.40
3
+ Version: 10.0.41
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -11,14 +11,6 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.6
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Dynamic: author
15
- Dynamic: author-email
16
- Dynamic: classifier
17
- Dynamic: description
18
- Dynamic: description-content-type
19
- Dynamic: home-page
20
- Dynamic: requires-python
21
- Dynamic: summary
22
14
 
23
15
 
24
16
  这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
@@ -206,6 +206,9 @@ re_common/v2/baselibrary/tools/data_processer/base.py
206
206
  re_common/v2/baselibrary/tools/data_processer/data_processer.py
207
207
  re_common/v2/baselibrary/tools/data_processer/data_reader.py
208
208
  re_common/v2/baselibrary/tools/data_processer/data_writer.py
209
+ re_common/v2/baselibrary/tools/tree_processor/__init__.py
210
+ re_common/v2/baselibrary/tools/tree_processor/builder.py
211
+ re_common/v2/baselibrary/tools/tree_processor/node.py
209
212
  re_common/v2/baselibrary/utils/BusinessStringUtil.py
210
213
  re_common/v2/baselibrary/utils/__init__.py
211
214
  re_common/v2/baselibrary/utils/api_net_utils.py
@@ -34,7 +34,7 @@ long_description = """
34
34
  """
35
35
  setuptools.setup(
36
36
  name="re_common",
37
- version="10.0.40",
37
+ version="10.0.41",
38
38
  author="vic",
39
39
  author_email="xujiang5@163.com",
40
40
  description="a library about all python projects",
File without changes
File without changes
File without changes