re-common 10.0.40__tar.gz → 10.0.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. {re_common-10.0.40/re_common.egg-info → re_common-10.0.42}/PKG-INFO +2 -10
  2. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +17 -2
  3. re_common-10.0.42/re_common/v2/baselibrary/s3object/baseaioboto3.py +48 -0
  4. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/base.py +2 -2
  5. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_processer.py +52 -63
  6. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_reader.py +9 -6
  7. re_common-10.0.42/re_common/v2/baselibrary/tools/dir_file_tools.py +27 -0
  8. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/list_tools.py +18 -1
  9. re_common-10.0.42/re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
  10. re_common-10.0.42/re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
  11. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/api_net_utils.py +49 -21
  12. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basetime.py +17 -0
  13. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/db.py +19 -1
  14. re_common-10.0.42/re_common/v2/baselibrary/utils/pinyin_utils.py +178 -0
  15. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/string_bool.py +2 -1
  16. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/stringutils.py +41 -0
  17. re_common-10.0.42/re_common/vip/proxy/__init__.py +0 -0
  18. {re_common-10.0.40 → re_common-10.0.42/re_common.egg-info}/PKG-INFO +2 -10
  19. {re_common-10.0.40 → re_common-10.0.42}/re_common.egg-info/SOURCES.txt +6 -0
  20. {re_common-10.0.40 → re_common-10.0.42}/setup.py +1 -1
  21. {re_common-10.0.40 → re_common-10.0.42}/LICENSE +0 -0
  22. {re_common-10.0.40 → re_common-10.0.42}/README.md +0 -0
  23. {re_common-10.0.40 → re_common-10.0.42}/pyproject.toml +0 -0
  24. {re_common-10.0.40 → re_common-10.0.42}/re_common/__init__.py +0 -0
  25. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/__init__.py +0 -0
  26. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/baseabs/__init__.py +0 -0
  27. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/baseabs/baseabs.py +0 -0
  28. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/__init__.py +0 -0
  29. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/mbuilder.py +0 -0
  30. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/moudle.py +0 -0
  31. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/msqlite3.py +0 -0
  32. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/mysql.py +0 -0
  33. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/database/sql_factory.py +0 -0
  34. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/MThreadingRun.py +0 -0
  35. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -0
  36. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/__init__.py +0 -0
  37. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/mthread/mythreading.py +0 -0
  38. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/pakge_other/__init__.py +0 -0
  39. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/pakge_other/socks.py +0 -0
  40. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/__init__.py +0 -0
  41. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/config_factory.py +0 -0
  42. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/ini_config.py +0 -0
  43. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/readconfig/toml_config.py +0 -0
  44. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/temporary/__init__.py +0 -0
  45. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/temporary/envdata.py +0 -0
  46. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/__init__.py +0 -0
  47. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  48. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -0
  49. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -0
  50. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/mrequest.py +0 -0
  51. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/all_requests/requests_request.py +0 -0
  52. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  53. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -0
  54. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/contrast_db3.py +0 -0
  55. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/copy_file.py +0 -0
  56. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/db3_2_sizedb3.py +0 -0
  57. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/foreachgz.py +0 -0
  58. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/get_attr.py +0 -0
  59. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/image_to_pdf.py +0 -0
  60. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/java_code_deal.py +0 -0
  61. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/javacode.py +0 -0
  62. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mdb_db3.py +0 -0
  63. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/merge_file.py +0 -0
  64. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/merge_gz_file.py +0 -0
  65. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  66. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -0
  67. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -0
  68. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -0
  69. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mongo_tools.py +0 -0
  70. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_file.py +0 -0
  71. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  72. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -0
  73. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -0
  74. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -0
  75. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/move_mongo/use_mv.py +0 -0
  76. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  77. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -0
  78. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -0
  79. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/myparsel.py +0 -0
  80. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/rename_dir_file.py +0 -0
  81. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/sequoiadb_utils.py +0 -0
  82. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/split_line_to_many.py +0 -0
  83. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/stringtodicts.py +0 -0
  84. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/tools/workwechant_bot.py +0 -0
  85. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/__init__.py +0 -0
  86. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseaiohttp.py +0 -0
  87. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseaiomysql.py +0 -0
  88. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseallstep.py +0 -0
  89. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseavro.py +0 -0
  90. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseboto3.py +0 -0
  91. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basecsv.py +0 -0
  92. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basedict.py +0 -0
  93. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basedir.py +0 -0
  94. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseencode.py +0 -0
  95. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseencoding.py +0 -0
  96. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseesdsl.py +0 -0
  97. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseexcel.py +0 -0
  98. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseexcept.py +0 -0
  99. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basefile.py +0 -0
  100. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseftp.py +0 -0
  101. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basegzip.py +0 -0
  102. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basehdfs.py +0 -0
  103. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basehttpx.py +0 -0
  104. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseip.py +0 -0
  105. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basejson.py +0 -0
  106. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baselist.py +0 -0
  107. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basemotor.py +0 -0
  108. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basemssql.py +0 -0
  109. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseodbc.py +0 -0
  110. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepandas.py +0 -0
  111. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepeewee.py +0 -0
  112. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepika.py +0 -0
  113. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepydash.py +0 -0
  114. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basepymongo.py +0 -0
  115. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basequeue.py +0 -0
  116. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baserar.py +0 -0
  117. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baserequest.py +0 -0
  118. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseset.py +0 -0
  119. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basesmb.py +0 -0
  120. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basestring.py +0 -0
  121. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basetime.py +0 -0
  122. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basetuple.py +0 -0
  123. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/baseurl.py +0 -0
  124. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/basezip.py +0 -0
  125. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/__init__.py +0 -0
  126. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/bottomutils.py +0 -0
  127. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/mdeprecated.py +0 -0
  128. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/mlamada.py +0 -0
  129. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/msginfo.py +0 -0
  130. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/core/requests_core.py +0 -0
  131. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/fateadm.py +0 -0
  132. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/importfun.py +0 -0
  133. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/mfaker.py +0 -0
  134. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/my_abc/__init__.py +0 -0
  135. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/my_abc/better_abc.py +0 -0
  136. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/mylogger.py +0 -0
  137. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/myredisclient.py +0 -0
  138. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/pipupgrade.py +0 -0
  139. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/ringlist.py +0 -0
  140. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/version_compare.py +0 -0
  141. {re_common-10.0.40 → re_common-10.0.42}/re_common/baselibrary/utils/ydmhttp.py +0 -0
  142. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/__init__.py +0 -0
  143. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/lazy_import.py +0 -0
  144. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/loggerfacade.py +0 -0
  145. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/mysqlfacade.py +0 -0
  146. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/now.py +0 -0
  147. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/sqlite3facade.py +0 -0
  148. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/use/__init__.py +0 -0
  149. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/use/mq_use_facade.py +0 -0
  150. {re_common-10.0.40 → re_common-10.0.42}/re_common/facade/use/proxy_use_facade.py +0 -0
  151. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/__init__.py +0 -0
  152. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/base_dict_test.py +0 -0
  153. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/baseavro_test.py +0 -0
  154. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/basefile_test.py +0 -0
  155. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/basemssql_test.py +0 -0
  156. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/baseodbc_test.py +0 -0
  157. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/basepandas_test.py +0 -0
  158. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/get_attr_test/__init__.py +0 -0
  159. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -0
  160. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/get_attr_test/settings.py +0 -0
  161. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/idencode_test.py +0 -0
  162. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/iniconfig_test.py +0 -0
  163. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/ip_test.py +0 -0
  164. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/merge_file_test.py +0 -0
  165. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mfaker_test.py +0 -0
  166. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mm3_test.py +0 -0
  167. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mylogger_test.py +0 -0
  168. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/myparsel_test.py +0 -0
  169. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/mysql_test.py +0 -0
  170. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/pymongo_test.py +0 -0
  171. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/split_test.py +0 -0
  172. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/sqlite3_merge_test.py +0 -0
  173. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/sqlite3_test.py +0 -0
  174. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/tomlconfig_test.py +0 -0
  175. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/use_tools_test/__init__.py +0 -0
  176. {re_common-10.0.40 → re_common-10.0.42}/re_common/libtest/user/__init__.py +0 -0
  177. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/__init__.py +0 -0
  178. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/assignment_expressions.py +0 -0
  179. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/mydash/__init__.py +0 -0
  180. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/mydash/test1.py +0 -0
  181. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/pydashstudio/__init__.py +0 -0
  182. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/pydashstudio/first.py +0 -0
  183. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/streamlitstudio/__init__.py +0 -0
  184. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/streamlitstudio/first_app.py +0 -0
  185. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/streamlitstudio/uber_pickups.py +0 -0
  186. {re_common-10.0.40 → re_common-10.0.42}/re_common/studio/test.py +0 -0
  187. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/__init__.py +0 -0
  188. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/__init__.py +0 -0
  189. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/__init__.py +0 -0
  190. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/baseencodeid.py +0 -0
  191. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/full_doi_path.py +0 -0
  192. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/business_utils/rel_tools.py +0 -0
  193. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/decorators/__init__.py +0 -0
  194. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/decorators/utils.py +0 -0
  195. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/__init__.py +0 -0
  196. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +0 -0
  197. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/__init__.py +0 -0
  198. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +0 -0
  199. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +0 -0
  200. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/helpers/search_packge/test.py +0 -0
  201. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/s3object/__init__.py +0 -0
  202. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/s3object/baseboto3.py +0 -0
  203. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/WeChatRobot.py +0 -0
  204. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/__init__.py +0 -0
  205. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/ac_ahocorasick.py +0 -0
  206. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/concurrency.py +0 -0
  207. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/__init__.py +0 -0
  208. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/data_processer/data_writer.py +0 -0
  209. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/dict_tools.py +0 -0
  210. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/dolphinscheduler.py +0 -0
  211. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_base_processor.py +0 -0
  212. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +0 -0
  213. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_data_processer.py +0 -0
  214. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/hdfs_line_processor.py +0 -0
  215. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/resume_tracker.py +0 -0
  216. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/search_hash_tools.py +0 -0
  217. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/text_matcher.py +0 -0
  218. {re_common-10.0.40/re_common/v2/baselibrary/utils → re_common-10.0.42/re_common/v2/baselibrary/tools/tree_processor}/__init__.py +0 -0
  219. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/tools/unionfind_tools.py +0 -0
  220. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/BusinessStringUtil.py +0 -0
  221. {re_common-10.0.40/re_common/vip → re_common-10.0.42/re_common/v2/baselibrary/utils}/__init__.py +0 -0
  222. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/author_smi.py +0 -0
  223. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/base_string_similarity.py +0 -0
  224. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basedict.py +0 -0
  225. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basehdfs.py +0 -0
  226. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/basepika.py +0 -0
  227. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/elasticsearch.py +0 -0
  228. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/json_cls.py +0 -0
  229. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/mq.py +0 -0
  230. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/n_ary_expression_tree.py +0 -0
  231. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/string_clear.py +0 -0
  232. {re_common-10.0.40 → re_common-10.0.42}/re_common/v2/baselibrary/utils/string_smi.py +0 -0
  233. {re_common-10.0.40/re_common/vip/proxy → re_common-10.0.42/re_common/vip}/__init__.py +0 -0
  234. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/base_step_process.py +0 -0
  235. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/baseencodeid.py +0 -0
  236. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/changetaskname.py +0 -0
  237. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/core_var.py +0 -0
  238. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/mmh3Hash.py +0 -0
  239. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/allproxys.py +0 -0
  240. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/allproxys_thread.py +0 -0
  241. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/cnki_proxy.py +0 -0
  242. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/kuaidaili.py +0 -0
  243. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/proxy_all.py +0 -0
  244. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/update_kuaidaili_0.py +0 -0
  245. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/wanfang_proxy.py +0 -0
  246. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/proxy/wp_proxy_all.py +0 -0
  247. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/read_rawid_to_txt.py +0 -0
  248. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/__init__.py +0 -0
  249. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformBookTitleToZt.py +0 -0
  250. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -0
  251. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -0
  252. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -0
  253. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -0
  254. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -0
  255. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -0
  256. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -0
  257. {re_common-10.0.40 → re_common-10.0.42}/re_common/vip/title/transform/__init__.py +0 -0
  258. {re_common-10.0.40 → re_common-10.0.42}/re_common.egg-info/dependency_links.txt +0 -0
  259. {re_common-10.0.40 → re_common-10.0.42}/re_common.egg-info/top_level.txt +0 -0
  260. {re_common-10.0.40 → re_common-10.0.42}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.40
3
+ Version: 10.0.42
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -11,14 +11,6 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.6
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Dynamic: author
15
- Dynamic: author-email
16
- Dynamic: classifier
17
- Dynamic: description
18
- Dynamic: description-content-type
19
- Dynamic: home-page
20
- Dynamic: requires-python
21
- Dynamic: summary
22
14
 
23
15
 
24
16
  这是一个基础类,依赖很多的第三方包,是一个用得到的第三方库的封装,可以在此基础上迅速构建项目
@@ -1,6 +1,9 @@
1
1
  # 某些业务中的字符串处理 算是特定场景的工具 不算通用工具
2
+ import itertools
2
3
  import re
3
4
 
5
+ from rapidfuzz.fuzz import partial_token_set_ratio
6
+
4
7
  from re_common.v2.baselibrary.utils.author_smi import AuthorRatio
5
8
  from re_common.v2.baselibrary.utils.string_bool import is_all_symbols
6
9
  from re_common.v2.baselibrary.utils.string_clear import rel_clear
@@ -199,7 +202,7 @@ def deal_num(num_str):
199
202
  return num_str.lower().strip()
200
203
 
201
204
 
202
- def clear_author_1st(author_str:str):
205
+ def clear_author_1st(author_str: str):
203
206
  # 清理括号 防止前面流程没有清理干净
204
207
  author_str = re.sub("\\[.*?]", "", author_str)
205
208
  author_str = re.sub("\\(.*?\\)", "", author_str)
@@ -209,6 +212,7 @@ def clear_author_1st(author_str:str):
209
212
 
210
213
  return author_str
211
214
 
215
+
212
216
  def is_same_author(a1, a2):
213
217
  if get_alphabetic_ratio(a1.strip()) > 0.7 and get_alphabetic_ratio(a2.strip()) > 0.7:
214
218
  author_similar_ = AuthorRatio(a1.strip(), a2.strip())
@@ -217,4 +221,15 @@ def is_same_author(a1, a2):
217
221
  else:
218
222
  if rel_clear(a1.strip()) == rel_clear(a2.strip()):
219
223
  return True
220
- return False
224
+ return False
225
+
226
+
227
+ def abs_smi(abs_l1, abs_l2):
228
+ abs_l1 = [cleared for cleared in map(rel_clear, abs_l1) if cleared]
229
+ abs_l2 = [cleared for cleared in map(rel_clear, abs_l2) if cleared]
230
+ lists_max = []
231
+ for abs1, abs2 in list(itertools.product(abs_l1, abs_l2)):
232
+ max_smi = partial_token_set_ratio(abs1, abs2, processor=rel_clear)
233
+ lists_max.append(max_smi)
234
+
235
+ return max(lists_max)
@@ -0,0 +1,48 @@
1
+ import aioboto3
2
+ import aiofiles
3
+ from aiobotocore.config import AioConfig
4
+
5
+
6
+ # config = AioConfig(connect_timeout=600000, read_timeout=600000, retries={'max_attempts': 3},
7
+ # max_pool_connections=10)
8
+
9
+ class BaseAioBoto3(object):
10
+
11
+ def __init__(self, aws_access_key_id, aws_secret_access_key, endpoint_url,
12
+ config=AioConfig(max_pool_connections=10)):
13
+ self.aws_access_key_id = aws_access_key_id
14
+ self.aws_secret_access_key = aws_secret_access_key
15
+ self.endpoint_url = endpoint_url
16
+ self.config = config
17
+ self.boto_session = None
18
+
19
+ async def initialize_class_variable(self):
20
+ if self.boto_session is None:
21
+ self.boto_session = aioboto3.Session(
22
+ aws_access_key_id=self.aws_access_key_id,
23
+ aws_secret_access_key=self.aws_secret_access_key,
24
+ )
25
+
26
+ async def read_minio_data(self, bucket, key):
27
+ await self.initialize_class_variable()
28
+ async with self.boto_session.client("s3", endpoint_url=self.endpoint_url, config=self.config) as s3:
29
+ s3_ob = await s3.get_object(Bucket=bucket, Key=key)
30
+ result = await s3_ob["Body"].read()
31
+ return result
32
+
33
+ # 异步下载大文件
34
+ async def download_file(self, bucket: str, key: str, local_path: str):
35
+ await self.initialize_class_variable()
36
+ async with self.boto_session.client("s3", endpoint_url=self.endpoint_url, config=self.config) as s3:
37
+ response = await s3.get_object(Bucket=bucket, Key=key)
38
+ body = response["Body"]
39
+
40
+ # 用异步方式写入本地
41
+ async with aiofiles.open(local_path, "wb") as f:
42
+ while True:
43
+ chunk = await body.read(10 * 1024 * 1024) # 每次读 10MB
44
+ if not chunk:
45
+ break
46
+ await f.write(chunk)
47
+
48
+ return local_path
@@ -4,9 +4,9 @@ from typing import List, Generator
4
4
 
5
5
  class BaseFileReader(ABC):
6
6
 
7
- def __init__(self, batch_size: int = 10000):
7
+ def __init__(self, batch_size: int = 10000, read_model: int = 1):
8
8
  self.batch_size = batch_size
9
- self.read_model = 1
9
+ self.read_model = read_model
10
10
 
11
11
  @abstractmethod
12
12
  def list_files(self, path: str) -> List[str]:
@@ -43,6 +43,15 @@ class DatabaseHandler:
43
43
  )
44
44
  conn.commit()
45
45
 
46
+ def get_processed_files_count(self):
47
+ """查看db3存储了多少成功的记录"""
48
+ with FileLock(self.lock_file):
49
+ with sqlite3.connect(self.db_file) as conn:
50
+ cursor = conn.cursor()
51
+ cursor.execute("SELECT COUNT(*) FROM processed_files")
52
+ count = cursor.fetchone()[0]
53
+ return count
54
+
46
55
  def save_processed_files_many(self, file_paths):
47
56
  """批量保存处理过的文件路径"""
48
57
  if not file_paths:
@@ -110,6 +119,21 @@ class DatabaseHandler:
110
119
  print(f"伪造处理记录时出错: {str(e)}")
111
120
 
112
121
 
122
+ def on_retry(retry_state):
123
+ # 每次抛错进入该函数打印消息
124
+ exc = retry_state.outcome.exception()
125
+ tb = ''.join(traceback.format_exception(type(exc), exc, exc.__traceback__))
126
+ print(tb)
127
+ print(
128
+ f"处理文件 {retry_state.args[0]} 时发生错误: {exc},正在重试 {retry_state.attempt_number}")
129
+
130
+
131
+ def on_retry_error(retry_state):
132
+ # 最后抛错后调用
133
+ print(f"处理文件 {retry_state.args[0]} 失败,达到重试上限")
134
+ return False
135
+
136
+
113
137
  class DataProcessor:
114
138
  def __init__(
115
139
  self,
@@ -249,38 +273,6 @@ class DataProcessor:
249
273
  for file_path in all_files:
250
274
  yield file_path
251
275
 
252
- @retry(stop=stop_after_attempt(3),
253
- wait=wait_random(min=10, max=30),
254
- retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
255
- reraise=True)
256
- async def _batch_process_file(self, hdfs_file_path: str, process_func: Callable[[str], Any],
257
- write_dir: str = None):
258
- """批量更新所有 gz 文件"""
259
- # all_succeed = True
260
- # for hdfs_file_path in self.get_file_list(hdfs_dir):
261
- # if self.db_handler.is_file_processed(hdfs_file_path):
262
- # print(f"跳过已处理文件: {hdfs_file_path}")
263
- # continue # 如果文件已处理,跳过
264
- # succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
265
- # if succeed is False:
266
- # all_succeed = False
267
- #
268
- # if all_succeed:
269
- # # 处理完成后删除数据库文件
270
- # try:
271
- # if os.path.exists(self.db_file):
272
- # os.remove(self.db_file)
273
- # print(f"已删除断点重试文件: {self.db_file}")
274
- # return True
275
- # except Exception as e:
276
- # print(f"删除断点重试文件失败: {e}")
277
- # return False
278
- if self.db_handler.is_file_processed(hdfs_file_path):
279
- print(f"跳过已处理文件: {hdfs_file_path}")
280
- return True # 如果文件已处理,跳过
281
- succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
282
- return succeed
283
-
284
276
  async def process_file_bulk(self, hdfs_file_path, process_func, write_dir):
285
277
  """按批次处理单个文件,批量数据传递给处理函数"""
286
278
  # 获取文件的数据总量
@@ -391,36 +383,6 @@ class DataProcessor:
391
383
  succeed = await self._batch_process_file(hdfs_file_path, process_func, write_dir)
392
384
  if succeed is False:
393
385
  all_succeed = False
394
- if all_succeed:
395
- # 处理完成后删除数据库文件
396
- try:
397
- if os.path.exists(self.db_file):
398
- os.remove(self.db_file)
399
- print(f"已删除断点重试文件: {self.db_file}")
400
- return True
401
- except Exception as e:
402
- print(f"删除断点重试文件失败: {e}")
403
- return False
404
-
405
- @retry(stop=stop_after_attempt(3),
406
- wait=wait_random(min=10, max=30),
407
- retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
408
- reraise=True)
409
- async def _batch_process_file_bulk(self, hdfs_file_path: str, process_func: Callable[[List[str]], Any],
410
- write_dir: str = None):
411
- """批量处理 gz 文件中的数据"""
412
- # 获取所有文件
413
- # all_succeed = True
414
- # for hdfs_file_path in self.get_file_list(hdfs_dir):
415
- # # 查看是否跳过文件
416
- # if self.db_handler.is_file_processed(hdfs_file_path):
417
- # print(f"跳过已处理文件: {hdfs_file_path}")
418
- # continue # 跳过已处理文件
419
- # # 开始批量处理文件
420
- # succeed = await self.retry_process_file_bulk(hdfs_file_path, process_func, write_dir)
421
- # if succeed is False:
422
- # all_succeed = False
423
- #
424
386
  # if all_succeed:
425
387
  # # 处理完成后删除数据库文件
426
388
  # try:
@@ -430,7 +392,17 @@ class DataProcessor:
430
392
  # return True
431
393
  # except Exception as e:
432
394
  # print(f"删除断点重试文件失败: {e}")
433
- # return False
395
+ return all_succeed
396
+
397
+ @retry(stop=stop_after_attempt(3),
398
+ wait=wait_random(min=10, max=30),
399
+ # retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
400
+ before_sleep=on_retry, # 每次抛错后使用
401
+ retry_error_callback=on_retry_error, # 如果最后没成功 返回 False
402
+ reraise=True) # 如果函数一直失败,重试结束时会 重新抛出最后一次调用时的原始异常。
403
+ async def _batch_process_file_bulk(self, hdfs_file_path: str, process_func: Callable[[List[str]], Any],
404
+ write_dir: str = None):
405
+ """批量处理 gz 文件中的数据"""
434
406
  # 查看是否跳过文件
435
407
  if self.db_handler.is_file_processed(hdfs_file_path):
436
408
  print(f"跳过已处理文件: {hdfs_file_path}")
@@ -439,6 +411,21 @@ class DataProcessor:
439
411
  succeed = await self.retry_process_file_bulk(hdfs_file_path, process_func, write_dir)
440
412
  return succeed
441
413
 
414
+ @retry(stop=stop_after_attempt(3),
415
+ wait=wait_random(min=10, max=30),
416
+ # retry=retry_if_result(lambda result: not result), # 如果返回值是 False(失败),则重试 最后会抛出一个默认错误tenacity.RetryError:
417
+ before_sleep=on_retry, # 每次抛错后使用
418
+ retry_error_callback=on_retry_error, # 如果最后没成功 返回 False
419
+ reraise=True)
420
+ async def _batch_process_file(self, hdfs_file_path: str, process_func: Callable[[str], Any],
421
+ write_dir: str = None):
422
+ """批量更新所有 gz 文件"""
423
+ if self.db_handler.is_file_processed(hdfs_file_path):
424
+ print(f"跳过已处理文件: {hdfs_file_path}")
425
+ return True # 如果文件已处理,跳过
426
+ succeed = await self.retry_process_file(hdfs_file_path, process_func, write_dir) # 处理文件
427
+ return succeed
428
+
442
429
 
443
430
  # 全局变量,每个进程独立持有
444
431
  _processor: DataProcessor | None = None
@@ -506,3 +493,5 @@ def run_worker_many(hdfs_dir: str, process_func: Callable[[List[str]], Any] | Ca
506
493
  for result in results:
507
494
  if result:
508
495
  print(result)
496
+ db3_count = processor.db_handler.get_processed_files_count()
497
+ print(f"db3文件数据量{db3_count},文件实际数据量{len(all_file)},是否完成全部转移: {db3_count == len(all_file)}")
@@ -12,8 +12,9 @@ from re_common.v2.baselibrary.tools.data_processer.base import BaseFileReader
12
12
 
13
13
 
14
14
  class HDFSFileReader(BaseFileReader):
15
- def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870", hdfs_user: str = "root"):
16
- super().__init__(batch_size)
15
+ def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
16
+ hdfs_user: str = "root"):
17
+ super().__init__(batch_size, read_model)
17
18
  self.client = InsecureClient(hdfs_url, user=hdfs_user)
18
19
 
19
20
  def list_files(self, path: str) -> List[str]:
@@ -48,8 +49,9 @@ class HDFSFileReader(BaseFileReader):
48
49
 
49
50
 
50
51
  class HDFSGZFileReader(BaseFileReader):
51
- def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870", hdfs_user: str = "root"):
52
- super().__init__(batch_size)
52
+ def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
53
+ hdfs_user: str = "root"):
54
+ super().__init__(batch_size, read_model)
53
55
  self.hdfs_url = hdfs_url
54
56
  self.hdfs_user = hdfs_user
55
57
  self.client = None
@@ -99,8 +101,9 @@ class HDFSGZFileReader(BaseFileReader):
99
101
 
100
102
 
101
103
  class HDFSParquetFileReader(BaseFileReader):
102
- def __init__(self, batch_size: int = 1000, hdfs_url: str = "http://VIP-DC-MASTER-2:9870", hdfs_user: str = "root"):
103
- super().__init__(batch_size)
104
+ def __init__(self, batch_size: int = 1000, read_model: int = 1, hdfs_url: str = "http://VIP-DC-MASTER-2:9870",
105
+ hdfs_user: str = "root"):
106
+ super().__init__(batch_size, read_model)
104
107
  self.client = InsecureClient(hdfs_url, user=hdfs_user)
105
108
 
106
109
  def list_files(self, path: str) -> List[str]:
@@ -0,0 +1,27 @@
1
+ import json
2
+ import os
3
+
4
+
5
+ def scan_dir_fast(path):
6
+ file_infos = []
7
+ with os.scandir(path) as entries:
8
+ for entry in entries:
9
+ if entry.is_file():
10
+ info = entry.stat()
11
+ file_infos.append({
12
+ "path": entry.path,
13
+ "size": info.st_size
14
+ })
15
+ return file_infos
16
+
17
+
18
+ def scan_dir(dir_name, result_file):
19
+ # dir_name r"/share/fulltext/errors"
20
+ # result_file "file_info_errors.txt"
21
+ for root, dirs, files in os.walk(dir_name):
22
+ print(root)
23
+ lists = scan_dir_fast(root)
24
+ with open(result_file, "a", encoding="utf-8") as file:
25
+ for i in lists:
26
+ if i:
27
+ file.write(json.dumps(i, ensure_ascii=False) + "\n")
@@ -1,4 +1,5 @@
1
1
  import itertools
2
+ from collections import Counter
2
3
  from typing import List, Any, Tuple
3
4
 
4
5
 
@@ -67,4 +68,20 @@ def list_to_dict(list_data,key_name):
67
68
 
68
69
  def split_list_by_step(lst, step=100):
69
70
  # 一维列表按照步长转换成二维列表
70
- return [lst[i:i + step] for i in range(0, len(lst), step)]
71
+ return [lst[i:i + step] for i in range(0, len(lst), step)]
72
+
73
+
74
+ def list_diff(l1, l2):
75
+ """
76
+ 非去重差异比较
77
+ Counter 虽然长得像字典,但它在运算符 & 和 - 上有特殊的定义。
78
+ 这样 能获取重复差集
79
+ """
80
+ c1, c2 = Counter(l1), Counter(l2)
81
+ # 共同部分
82
+ common = list((c1 & c2).elements())
83
+ # l1 多余的部分
84
+ extra1 = list((c1 - c2).elements())
85
+ # l2 多余的部分
86
+ extra2 = list((c2 - c1).elements())
87
+ return common, extra1, extra2
@@ -0,0 +1,25 @@
1
+ from re_common.v2.baselibrary.tools.tree_processor.node import TreeNode
2
+
3
+
4
+ def build_forest(node_list):
5
+ nodes = {} # cid -> TreeNode
6
+ has_parent = set()
7
+
8
+ # 第一步:创建所有节点
9
+ for cid, pid, count in node_list:
10
+ node = TreeNode(cid, count)
11
+ nodes[cid] = node
12
+ if pid is not None:
13
+ has_parent.add(cid)
14
+
15
+ # 第二步:连接 parent-child
16
+ for cid, pid, _ in node_list:
17
+ if pid is not None and pid in nodes:
18
+ parent = nodes[pid]
19
+ child = nodes[cid]
20
+ parent.children.append(child)
21
+ child.parent = parent
22
+
23
+ # 第三步:找所有根节点(即没有 parent 的)
24
+ roots = [node for cid, node in nodes.items() if node.parent is None]
25
+ return roots # 返回多棵树的根节点列表
@@ -0,0 +1,13 @@
1
+ class TreeNode:
2
+ def __init__(self, cid, count):
3
+ self.id = cid
4
+ self.count = count
5
+ self.children = []
6
+ self.parent = None
7
+
8
+ def add_child(self, child):
9
+ self.children.append(child)
10
+ child.parent = self
11
+
12
+ def is_leaf(self):
13
+ return len(self.children) == 0
@@ -1,8 +1,11 @@
1
1
  import atexit
2
+ import os
2
3
  import sys
3
4
  import asyncio
5
+ import traceback
6
+
4
7
  import aiohttp
5
- from typing import Optional
8
+ from typing import Optional, Union
6
9
 
7
10
  from tenacity import retry, stop_after_attempt, wait_random
8
11
 
@@ -62,6 +65,13 @@ def on_retry_error(retry_state):
62
65
 
63
66
  def on_retry(retry_state):
64
67
  # 每次抛错进入该函数打印消息
68
+
69
+ # # 获取函数调用参数
70
+ # args = retry_state.args
71
+ # kwargs = retry_state.kwargs
72
+ #
73
+ # print(id(args[0]._get_session()))
74
+
65
75
  print(
66
76
  f"[HTTP 请求重试]"
67
77
  f"当前重试 : 第 {retry_state.attempt_number} 次"
@@ -84,6 +94,8 @@ class ApiNetUtils:
84
94
  _conn: Optional[aiohttp.TCPConnector] = None
85
95
  _session: Optional[aiohttp.ClientSession] = None
86
96
  _close_registered: bool = False # 确保清理函数只注册一次
97
+ _pid: Optional[int] = None # 当前进程的 PID
98
+ lock = asyncio.Lock()
87
99
 
88
100
  @classmethod
89
101
  async def _get_connector(cls) -> aiohttp.TCPConnector:
@@ -96,9 +108,9 @@ class ApiNetUtils:
96
108
  cls._conn = aiohttp.TCPConnector(
97
109
  limit=50, # 最大连接数
98
110
  ssl=False, # 禁用SSL验证(按需开启)
99
- force_close=True, # 保持连接活跃
100
- # enable_cleanup_closed=True, # 自动清理关闭的连接 #
101
- # keepalive_timeout=4.99 # 比服务器的5s 小一点
111
+ force_close=False, # 保持连接活跃
112
+ enable_cleanup_closed=True, # 自动清理关闭的连接 #
113
+ keepalive_timeout=4.99 # 比服务器的5s 小一点
102
114
  )
103
115
  return cls._conn
104
116
 
@@ -108,25 +120,41 @@ class ApiNetUtils:
108
120
  获取共享会话(线程安全的延迟初始化)
109
121
  包含自动注册清理机制
110
122
  """
111
- if cls._session is None or cls._session.closed or cls.is_loop_closed(cls._session):
112
- if cls._session:
113
- await cls.close()
114
- # 获取连接器(会自动初始化)
115
- connector = await cls._get_connector()
116
-
117
- # 强制获取新的事件循环
118
- loop = asyncio.get_event_loop()
123
+ async with cls.lock:
124
+ current_pid = os.getpid()
125
+ if cls._pid != current_pid:
126
+ # 新进程,重新初始化
127
+ if cls._session:
128
+ await cls.close()
129
+ cls._pid = current_pid
130
+
131
+ if cls._session is None or cls._session.closed or cls.is_loop_closed(cls._session):
132
+ if cls._session:
133
+ await cls.close()
134
+ # 获取连接器(会自动初始化)
135
+ connector = await cls._get_connector()
136
+
137
+ # 强制获取新的事件循环
138
+ loop = asyncio.get_event_loop()
139
+
140
+ timeout = aiohttp.ClientTimeout(
141
+ total=120, # 整个请求最多 30 秒
142
+ connect=10, # 最多 5 秒连接
143
+ sock_connect=10,
144
+ sock_read=110, # 最多 20 秒读取响应数据
145
+ )
119
146
 
120
- # 创建新会话
121
- cls._session = aiohttp.ClientSession(
122
- connector=connector,
123
- timeout=aiohttp.ClientTimeout(total=30), # 默认30秒超时
124
- loop=loop) # 显式指定事件循环
147
+ # 创建新会话
148
+ cls._session = aiohttp.ClientSession(
149
+ connector=connector,
150
+ timeout=timeout, # 默认30秒超时
151
+ loop=loop,
152
+ ) # 显式指定事件循环
125
153
 
126
- # # 注册退出时的清理钩子
127
- cls._register_cleanup()
154
+ # # 注册退出时的清理钩子
155
+ cls._register_cleanup()
128
156
 
129
- return cls._session
157
+ return cls._session
130
158
 
131
159
  @staticmethod
132
160
  def is_loop_closed(session: aiohttp.ClientSession) -> bool:
@@ -135,7 +163,7 @@ class ApiNetUtils:
135
163
  """
136
164
  loop = session._loop # 获取会话绑定的事件循环
137
165
  if loop.is_closed():
138
- # print("Event loop is closed")
166
+ print("Event loop is closed")
139
167
  return True
140
168
  # print("Event loop not is closed")
141
169
  return False
@@ -75,3 +75,20 @@ class BaseTime(object):
75
75
  current_time = BaseTime.get_current_beijing_time()
76
76
  last_time = BaseTime.parse_beijing_time(last_time_str)
77
77
  return current_time.hour != last_time.hour
78
+
79
+ @staticmethod
80
+ def is_weekday(num_weekday: int) -> bool:
81
+ """
82
+ 判断当前日期是否为指定星期。
83
+
84
+ 参数:
85
+ num_weekday (int): 表示星期的数字(1=星期一, 2=星期二, ..., 7=星期日)。
86
+
87
+ 返回:
88
+ bool: 如果当前日期不是指定的星期,则返回 True;否则返回 False。
89
+
90
+ 示例:
91
+ 如果 num_weekday=6(星期六),而今天是星期五(weekday()=4),则返回 True。
92
+ """
93
+ current_weekday = datetime.now().weekday() # 获取当前星期(0=星期一, 1=星期二, ..., 6=星期日)
94
+ return current_weekday != num_weekday - 1
@@ -94,7 +94,6 @@ aiomysql_pool = None
94
94
  pool_lock = asyncio.Lock() # 全局异步锁
95
95
 
96
96
 
97
-
98
97
  async def init_aiomysql_pool_async():
99
98
  global aiomysql_pool
100
99
  if aiomysql_pool is None:
@@ -110,6 +109,7 @@ client = None
110
109
  motor_fs_lock = asyncio.Lock() # 全局异步锁
111
110
  _loop_id_mongo = None
112
111
 
112
+
113
113
  async def check_connection(client):
114
114
  try:
115
115
  print("check mongodb client ping")
@@ -142,6 +142,7 @@ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
142
142
  _loop_id_mongo = id(asyncio.get_running_loop())
143
143
  return motor_fs, client
144
144
 
145
+
145
146
  # async def run_main():
146
147
  # while True:
147
148
  # uri = "mongodb://192.168.98.80:27001/wpdc"
@@ -154,3 +155,20 @@ async def init_motor_async(uri, db_name, bucket_name, is_reload=False):
154
155
  #
155
156
  # if __name__ == "__main__":
156
157
  # asyncio.run(run_main())
158
+
159
+
160
+ def get_connection(autocommit: bool = True) -> Connection:
161
+ from pymysql import Connection
162
+ from pymysql.cursors import DictCursor
163
+ import pymysql
164
+ db_conf = {
165
+ "host": "192.168.98.55",
166
+ "port": 4000,
167
+ "user": "dataware_house_baseUser",
168
+ "password": "FF19AF831AEBD580B450B16BF9264200",
169
+ "database": "dataware_house_base",
170
+ "autocommit": autocommit,
171
+ "cursorclass": DictCursor,
172
+ }
173
+ conn: Connection = pymysql.connect(**db_conf)
174
+ return conn