re-common 2.0.1__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. re_common/baselibrary/__init__.py +4 -0
  2. re_common/baselibrary/baseabs/__init__.py +7 -0
  3. re_common/baselibrary/baseabs/baseabs.py +26 -0
  4. re_common/baselibrary/database/__init__.py +0 -0
  5. re_common/baselibrary/database/mbuilder.py +132 -0
  6. re_common/baselibrary/database/moudle.py +93 -0
  7. re_common/baselibrary/database/msqlite3.py +194 -0
  8. re_common/baselibrary/database/mysql.py +169 -0
  9. re_common/baselibrary/database/sql_factory.py +26 -0
  10. re_common/baselibrary/mthread/MThreadingRun.py +486 -0
  11. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -0
  12. re_common/baselibrary/mthread/__init__.py +3 -0
  13. re_common/baselibrary/mthread/mythreading.py +695 -0
  14. re_common/baselibrary/pakge_other/__init__.py +0 -0
  15. re_common/baselibrary/pakge_other/socks.py +404 -0
  16. re_common/baselibrary/readconfig/__init__.py +0 -0
  17. re_common/baselibrary/readconfig/config_factory.py +18 -0
  18. re_common/baselibrary/readconfig/ini_config.py +317 -0
  19. re_common/baselibrary/readconfig/toml_config.py +49 -0
  20. re_common/baselibrary/temporary/__init__.py +0 -0
  21. re_common/baselibrary/temporary/envdata.py +36 -0
  22. re_common/baselibrary/tools/__init__.py +0 -0
  23. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  24. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -0
  25. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -0
  26. re_common/baselibrary/tools/all_requests/mrequest.py +412 -0
  27. re_common/baselibrary/tools/all_requests/requests_request.py +81 -0
  28. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  29. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -0
  30. re_common/baselibrary/tools/contrast_db3.py +123 -0
  31. re_common/baselibrary/tools/copy_file.py +39 -0
  32. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -0
  33. re_common/baselibrary/tools/foreachgz.py +40 -0
  34. re_common/baselibrary/tools/get_attr.py +11 -0
  35. re_common/baselibrary/tools/image_to_pdf.py +62 -0
  36. re_common/baselibrary/tools/java_code_deal.py +139 -0
  37. re_common/baselibrary/tools/javacode.py +79 -0
  38. re_common/baselibrary/tools/mdb_db3.py +48 -0
  39. re_common/baselibrary/tools/merge_file.py +171 -0
  40. re_common/baselibrary/tools/merge_gz_file.py +165 -0
  41. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  42. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -0
  43. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -0
  44. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -0
  45. re_common/baselibrary/tools/mongo_tools.py +50 -0
  46. re_common/baselibrary/tools/move_file.py +170 -0
  47. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  48. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -0
  49. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -0
  50. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -0
  51. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -0
  52. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  53. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -0
  54. re_common/baselibrary/tools/mpandas/pandas_visualization.py +8 -0
  55. re_common/baselibrary/tools/myparsel.py +104 -0
  56. re_common/baselibrary/tools/rename_dir_file.py +37 -0
  57. re_common/baselibrary/tools/sequoiadb_utils.py +398 -0
  58. re_common/baselibrary/tools/split_line_to_many.py +25 -0
  59. re_common/baselibrary/tools/stringtodicts.py +33 -0
  60. re_common/baselibrary/tools/workwechant_bot.py +84 -0
  61. re_common/baselibrary/utils/__init__.py +0 -0
  62. re_common/baselibrary/utils/baseaiohttp.py +296 -0
  63. re_common/baselibrary/utils/baseaiomysql.py +87 -0
  64. re_common/baselibrary/utils/baseallstep.py +191 -0
  65. re_common/baselibrary/utils/baseavro.py +19 -0
  66. re_common/baselibrary/utils/baseboto3.py +291 -0
  67. re_common/baselibrary/utils/basecsv.py +32 -0
  68. re_common/baselibrary/utils/basedict.py +133 -0
  69. re_common/baselibrary/utils/basedir.py +241 -0
  70. re_common/baselibrary/utils/baseencode.py +351 -0
  71. re_common/baselibrary/utils/baseencoding.py +29 -0
  72. re_common/baselibrary/utils/baseesdsl.py +86 -0
  73. re_common/baselibrary/utils/baseexcel.py +264 -0
  74. re_common/baselibrary/utils/baseexcept.py +109 -0
  75. re_common/baselibrary/utils/basefile.py +654 -0
  76. re_common/baselibrary/utils/baseftp.py +214 -0
  77. re_common/baselibrary/utils/basegzip.py +60 -0
  78. re_common/baselibrary/utils/basehdfs.py +135 -0
  79. re_common/baselibrary/utils/basehttpx.py +268 -0
  80. re_common/baselibrary/utils/baseip.py +87 -0
  81. re_common/baselibrary/utils/basejson.py +2 -0
  82. re_common/baselibrary/utils/baselist.py +32 -0
  83. re_common/baselibrary/utils/basemotor.py +190 -0
  84. re_common/baselibrary/utils/basemssql.py +98 -0
  85. re_common/baselibrary/utils/baseodbc.py +113 -0
  86. re_common/baselibrary/utils/basepandas.py +302 -0
  87. re_common/baselibrary/utils/basepeewee.py +11 -0
  88. re_common/baselibrary/utils/basepika.py +180 -0
  89. re_common/baselibrary/utils/basepydash.py +143 -0
  90. re_common/baselibrary/utils/basepymongo.py +230 -0
  91. re_common/baselibrary/utils/basequeue.py +22 -0
  92. re_common/baselibrary/utils/baserar.py +57 -0
  93. re_common/baselibrary/utils/baserequest.py +279 -0
  94. re_common/baselibrary/utils/baseset.py +8 -0
  95. re_common/baselibrary/utils/basesmb.py +403 -0
  96. re_common/baselibrary/utils/basestring.py +382 -0
  97. re_common/baselibrary/utils/basetime.py +320 -0
  98. re_common/baselibrary/utils/basetuple.py +0 -0
  99. re_common/baselibrary/utils/baseurl.py +121 -0
  100. re_common/baselibrary/utils/basezip.py +57 -0
  101. re_common/baselibrary/utils/core/__init__.py +8 -0
  102. re_common/baselibrary/utils/core/bottomutils.py +18 -0
  103. re_common/baselibrary/utils/core/mdeprecated.py +327 -0
  104. re_common/baselibrary/utils/core/mlamada.py +16 -0
  105. re_common/baselibrary/utils/core/msginfo.py +25 -0
  106. re_common/baselibrary/utils/core/requests_core.py +103 -0
  107. re_common/baselibrary/utils/fateadm.py +429 -0
  108. re_common/baselibrary/utils/importfun.py +123 -0
  109. re_common/baselibrary/utils/mfaker.py +57 -0
  110. re_common/baselibrary/utils/my_abc/__init__.py +3 -0
  111. re_common/baselibrary/utils/my_abc/better_abc.py +32 -0
  112. re_common/baselibrary/utils/mylogger.py +414 -0
  113. re_common/baselibrary/utils/myredisclient.py +861 -0
  114. re_common/baselibrary/utils/pipupgrade.py +21 -0
  115. re_common/baselibrary/utils/ringlist.py +85 -0
  116. re_common/baselibrary/utils/version_compare.py +36 -0
  117. re_common/baselibrary/utils/ydmhttp.py +126 -0
  118. re_common/facade/__init__.py +1 -0
  119. re_common/facade/lazy_import.py +11 -0
  120. re_common/facade/loggerfacade.py +25 -0
  121. re_common/facade/mysqlfacade.py +467 -0
  122. re_common/facade/now.py +31 -0
  123. re_common/facade/sqlite3facade.py +257 -0
  124. re_common/facade/use/__init__.py +0 -0
  125. re_common/facade/use/mq_use_facade.py +83 -0
  126. re_common/facade/use/proxy_use_facade.py +20 -0
  127. re_common/libtest/__init__.py +0 -0
  128. re_common/libtest/base_dict_test.py +19 -0
  129. re_common/libtest/baseavro_test.py +13 -0
  130. re_common/libtest/basefile_test.py +14 -0
  131. re_common/libtest/basemssql_test.py +77 -0
  132. re_common/libtest/baseodbc_test.py +8 -0
  133. re_common/libtest/basepandas_test.py +38 -0
  134. re_common/libtest/get_attr_test/__init__.py +0 -0
  135. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -0
  136. re_common/libtest/get_attr_test/settings.py +55 -0
  137. re_common/libtest/idencode_test.py +54 -0
  138. re_common/libtest/iniconfig_test.py +35 -0
  139. re_common/libtest/ip_test.py +35 -0
  140. re_common/libtest/merge_file_test.py +20 -0
  141. re_common/libtest/mfaker_test.py +9 -0
  142. re_common/libtest/mm3_test.py +32 -0
  143. re_common/libtest/mylogger_test.py +89 -0
  144. re_common/libtest/myparsel_test.py +28 -0
  145. re_common/libtest/mysql_test.py +151 -0
  146. re_common/libtest/pymongo_test.py +21 -0
  147. re_common/libtest/split_test.py +12 -0
  148. re_common/libtest/sqlite3_merge_test.py +6 -0
  149. re_common/libtest/sqlite3_test.py +34 -0
  150. re_common/libtest/tomlconfig_test.py +30 -0
  151. re_common/libtest/use_tools_test/__init__.py +3 -0
  152. re_common/libtest/user/__init__.py +5 -0
  153. re_common/studio/__init__.py +5 -0
  154. re_common/studio/assignment_expressions.py +37 -0
  155. re_common/studio/mydash/__init__.py +0 -0
  156. re_common/studio/mydash/test1.py +19 -0
  157. re_common/studio/pydashstudio/__init__.py +0 -0
  158. re_common/studio/pydashstudio/first.py +9 -0
  159. re_common/studio/streamlitstudio/__init__.py +0 -0
  160. re_common/studio/streamlitstudio/first_app.py +66 -0
  161. re_common/studio/streamlitstudio/uber_pickups.py +24 -0
  162. re_common/studio/test.py +19 -0
  163. re_common/vip/__init__.py +0 -0
  164. re_common/vip/base_step_process.py +11 -0
  165. re_common/vip/baseencodeid.py +91 -0
  166. re_common/vip/changetaskname.py +28 -0
  167. re_common/vip/core_var.py +24 -0
  168. re_common/vip/mmh3Hash.py +90 -0
  169. re_common/vip/proxy/__init__.py +0 -0
  170. re_common/vip/proxy/allproxys.py +127 -0
  171. re_common/vip/proxy/allproxys_thread.py +159 -0
  172. re_common/vip/proxy/cnki_proxy.py +153 -0
  173. re_common/vip/proxy/kuaidaili.py +87 -0
  174. re_common/vip/proxy/proxy_all.py +113 -0
  175. re_common/vip/proxy/update_kuaidaili_0.py +42 -0
  176. re_common/vip/proxy/wanfang_proxy.py +152 -0
  177. re_common/vip/proxy/wp_proxy_all.py +182 -0
  178. re_common/vip/read_rawid_to_txt.py +92 -0
  179. re_common/vip/title/__init__.py +5 -0
  180. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -0
  181. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -0
  182. re_common/vip/title/transform/TransformCstadTitleToZt.py +196 -0
  183. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -0
  184. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -0
  185. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -0
  186. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -0
  187. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -0
  188. re_common/vip/title/transform/__init__.py +11 -0
  189. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/METADATA +1 -1
  190. re_common-10.0.0.dist-info/RECORD +213 -0
  191. re_common-2.0.1.dist-info/RECORD +0 -25
  192. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/LICENSE +0 -0
  193. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/WHEEL +0 -0
  194. {re_common-2.0.1.dist-info → re_common-10.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,182 @@
1
+ # -*- coding: utf-8 -*-
2
+ # @Time : 2020/6/23 14:38
3
+ # @Author : suhong
4
+ # @File : wp_proxy_all.py
5
+ # @Software: PyCharm
6
+
7
+ """
8
+ 云代理 开放代理 刷新验证进入redis (db3 wp_proxy_all)
9
+ """
10
+ import json
11
+ import random
12
+ import time
13
+
14
+ from re_common.baselibrary.utils.core.requests_core import USER_AGENTS
15
+
16
+ from re_common.facade.mysqlfacade import MysqlUtiles
17
+
18
+ from re_common.baselibrary.mthread.mythreading import ThreadInfo, ThreadVal, ThreadPoolManger
19
+
20
+ from re_common.baselibrary.mthread.MThreadingRun import MThreadingRun
21
+
22
+ from re_common.baselibrary.utils.baserequest import BaseRequest
23
+
24
+ from re_common.baselibrary.utils.myredisclient import MyRedis
25
+
26
+
27
+ class ProxyAll(object):
28
+ def __init__(self, config="./db.ini"):
29
+ self.config = config
30
+ self.myredis = MyRedis(configpath=self.config, sesc='wp_proxy',is_conn_or_pipe=False)
31
+ # self.myredis.get_pipeline()
32
+ self.myredis.builder()
33
+
34
+ self.Headers = {
35
+ 'Accept': '*/*',
36
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
37
+ }
38
+ self.UserAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
39
+ self.baserequest = BaseRequest()
40
+ self.ProxyPoolTotal = set()
41
+ self.starttime = time.time()
42
+ self.mysqlutils_proxy = MysqlUtiles('./db.ini', "db_proxy")
43
+
44
+ def get_redis_all(self):
45
+ return self.myredis.getDataFromRedis()
46
+
47
+ def get_proxy_ip3366(self, num=600):
48
+ proxyPool = set()
49
+ # url = "http://gea.ip3366.net/api/?key=20200622135212736&getnum={}&order=1&formats=2&proxytype=01".format(num)
50
+ url = "http://gea.ip3366.net/api/?key=20200622135212736&getnum={}&formats=2&proxytype=01".format(num)
51
+ try:
52
+ bools, estring, r = self.baserequest.base_request(url,
53
+ headers=self.Headers,
54
+ marks=['Ip'])
55
+ if bools:
56
+ json_data = json.loads(r.text)
57
+ for info in json_data:
58
+ proxy = info['Ip'] + ":" + str(info['Port'])
59
+ proxyPool.add(proxy)
60
+
61
+ return True, proxyPool
62
+
63
+ return False, proxyPool
64
+ except Exception as e:
65
+ print(e)
66
+ return False, proxyPool
67
+
68
+
69
+ class YanzhengThreadRun(MThreadingRun):
70
+ def __init__(self, num):
71
+ super(YanzhengThreadRun, self).__init__(num)
72
+ self.pro = ProxyAll()
73
+ self.yz_right_set = set()
74
+ self.is_clean_redis = False
75
+
76
+
77
+ def getTask(self, *args, **kwargs):
78
+ return []
79
+
80
+ def setTask(self, results=None, *args, **kwargs):
81
+
82
+ if self.thread_pool.work_queue.is_empty():
83
+
84
+ redis_proxypool = self.pro.get_redis_all()
85
+ self.is_clean_redis = True
86
+ if time.time() - self.pro.starttime <= 5:
87
+ time.sleep(6 - (time.time() - self.pro.starttime))
88
+ bools, proxypool = self.pro.get_proxy_ip3366()
89
+ result_pro = proxypool.union(redis_proxypool)
90
+ if bools:
91
+ for raw in result_pro:
92
+ self.add_job(self.func,raw)
93
+ self.pro.starttime = time.time()
94
+
95
+
96
+ def dealresult(self, *args, **kwargs):
97
+ if self.is_clean_redis:
98
+ # 清理reids
99
+ self.pipe.delete(self.pro.myredis.RedisKey)
100
+ self.is_clean_redis = False
101
+
102
+ # 处理self.yz_right_set集合
103
+ print('Write DataBase %s ...' % self.pro.myredis.RedisKey)
104
+ self.pipe.sadd(self.pro.myredis.RedisKey, *self.results)
105
+ curTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
106
+ self.pipe.hset('update_time', self.pro.myredis.RedisKey, curTime)
107
+ self.pipe.execute()
108
+ # 插入mysql
109
+ for raw in self.results:
110
+ self.deal_sql(raw)
111
+
112
+ @ThreadPoolManger.thread_lock
113
+ def deal_sql(self,raw):
114
+ # 插入mysql统计每天获取的代理个数
115
+ sql = "insert into gongwangproxy(proxy,cishu) Values('{}',1) on DUPLICATE key update cishu=cishu+1".format(raw)
116
+ self.pro.mysqlutils_proxy.ExeSqlToDB(sql)
117
+
118
+
119
+
120
+ def setProxy(self, proxysList=None):
121
+ pass
122
+
123
+ def is_break(self):
124
+ return False
125
+
126
+ def thread_pool_hook(self, threadinfo: ThreadInfo):
127
+ # 设置代理线程不重启,默认会重启
128
+ # if threadinfo.get_thread_name() == self.etn.proxythreadname:
129
+ # threadinfo.set_is_restart(True)
130
+ # if threadinfo.get_thread_name() == self.etn.taskthreadname:
131
+ # threadinfo.set_is_restart(False)
132
+ return {}
133
+
134
+ def fun(self, threadval: ThreadVal, *args, **kwargs):
135
+ """
136
+ 验证代理有效性,百度
137
+ """
138
+ raw = args[0]
139
+ result_queue = threadval.get_result_queue()
140
+ ppp = {
141
+ 'http': raw,
142
+ 'https': raw
143
+ }
144
+ try:
145
+ url = "https://www.baidu.com/"
146
+ bools, e, r = self.pro.baserequest.base_request(url,
147
+ headers=self.pro.Headers,
148
+ proxies=ppp,
149
+ timeout=5,
150
+ marks=['百度一下,你就知道'])
151
+ if bools:
152
+ result_queue.put(raw)
153
+ except Exception as e:
154
+ print(e)
155
+
156
+ # 验证超星期刊
157
+ # yzurl = "http://qikan.chaoxing.com/mag/infos?mags=ea15bb11cfca2424ae72402ca8461604"
158
+ # mags = "ea15bb11cfca2424ae72402ca8461604"
159
+ # HEADER = {
160
+ # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
161
+ # 'User-Agent': random.choice(USER_AGENTS),
162
+ # 'Accept-Encoding': 'gzip, deflate',
163
+ # 'Referer': 'http://qikan.chaoxing.com',
164
+ # }
165
+ # HEADER['Referer'] = 'http://qikan.chaoxing.com/mag/infos?mags=' + mags
166
+ # try:
167
+ # BoolResult, errString, r = self.pro.baserequest.base_request(url=yzurl,
168
+ # headers=HEADER,
169
+ # timeout=(5, 10),
170
+ # proxies=ppp,
171
+ # marks=['Fbookright fl'])
172
+ # if BoolResult:
173
+ # result_queue.put(raw)
174
+ # except Exception as e:
175
+ # print(e)
176
+
177
+
178
+ if __name__ == '__main__':
179
+ # p = ProxyAll()
180
+ # p.get_redis_all()
181
+ yz = YanzhengThreadRun(40)
182
+ yz.run()
@@ -0,0 +1,92 @@
1
+ from re_common.baselibrary.utils.basefile import BaseFile
2
+
3
+ from re_common.facade.mysqlfacade import MysqlUtiles
4
+
5
+
6
+ def get_rawid_from_mysql():
7
+ strings = """
8
+ host = 192.168.31.209
9
+ user = root
10
+ passwd = vipdatacenter
11
+ db = cnki_qk
12
+ port = 3306
13
+ chartset = utf8
14
+ """
15
+ mysqlrawid = r"F:\fun2\mysqlrawid.txt"
16
+ feilds = "filename"
17
+
18
+ dicts_change = {"key为原来的": "values为现在的"}
19
+
20
+ from re_common.baselibrary.tools.stringtodicts import StringToDicts
21
+
22
+ dicts = StringToDicts().string_to_dicts_by_equal(strings)
23
+ mysqlutils = MysqlUtiles("", "", builder="MysqlBuilderForDicts", dicts=dicts)
24
+ offset = 0
25
+ limit = 1000000
26
+ while True:
27
+ bools, raws = mysqlutils.SelectFromDB("select `{}` from article limit {},{}".format(feilds, offset, limit))
28
+ if len(raws) == 0:
29
+ break
30
+ with open(mysqlrawid, 'w', encoding="utf-8") as f:
31
+ for row in raws:
32
+ f.write(row[0] + "\n")
33
+ offset = offset + limit
34
+
35
+
36
+ # get_rawid_from_mysql()
37
+ # print(BaseFile.get_file_line_num(r'C:\Users\xuzhu\Downloads\part-r-00000'))
38
+
39
+ def read_files_compose():
40
+ """
41
+ 读取两个文件对比
42
+ :return:
43
+ """
44
+ sets1 = set()
45
+ sets2 = set()
46
+ for value in BaseFile.read_file_r_mode_yield(r'C:\Users\xuzhu\Downloads\part-r-00000'):
47
+ sets2.add(value)
48
+
49
+ for value in BaseFile.read_file_r_mode_yield(r'F:\fun2\mysqlrawid.txt'):
50
+ sets1.add(value)
51
+ set3 = sets1 - sets2
52
+ cha = r"F:\fun2\cha.txt"
53
+ with open(cha, 'w', encoding="utf-8") as f:
54
+ for row in set3:
55
+ f.write(row + "\n")
56
+
57
+
58
+ #read_files_compose()
59
+
60
+ def set_stat():
61
+ strings = """
62
+ host = 192.168.31.209
63
+ user = root
64
+ passwd = vipdatacenter
65
+ db = cnki_qk
66
+ port = 3306
67
+ chartset = utf8
68
+ """
69
+ charawid = r"F:\fun2\cha.txt"
70
+
71
+ dicts_change = {"key为原来的": "values为现在的"}
72
+
73
+ from re_common.baselibrary.tools.stringtodicts import StringToDicts
74
+
75
+ dicts = StringToDicts().string_to_dicts_by_equal(strings)
76
+ mysqlutils = MysqlUtiles("", "", builder="MysqlBuilderForDicts", dicts=dicts)
77
+ lists = []
78
+ for values in BaseFile.read_file_r_mode_yield(charawid):
79
+ lists.append(values)
80
+ if len(lists) > 100000:
81
+ sql = "update article set stat=0,ref_stat=0 where filename in {}".format(tuple(lists))
82
+ mysqlutils.ExeSqlToDB(sql)
83
+ lists.clear()
84
+
85
+ if len(lists) > 0:
86
+ if len(lists) == 1:
87
+ lists.append("test")
88
+ sql = "update article set stat=0,ref_stat=0 where filename in {}".format(tuple(lists))
89
+ mysqlutils.ExeSqlToDB(sql)
90
+ lists.clear()
91
+
92
+ set_stat()
@@ -0,0 +1,5 @@
1
+ # -*- coding:utf-8 -*-
2
+ # @Time : 2021/12/2 9:38
3
+ # @Author: suhong
4
+ # @File : __init__.py.py
5
+ # @Function :
@@ -0,0 +1,125 @@
1
+ # -*- coding:utf-8 -*-
2
+ # @Time : 2021/12/19 18:16
3
+ # @Author: suhong
4
+ # @File : TransformBookTitleToZt.py
5
+ # @Function : 图书a层转智图
6
+ from re_common.baselibrary.utils.basetime import BaseTime
7
+
8
+ from re_common.facade.mysqlfacade import MysqlUtiles
9
+
10
+
11
+ class TransformBookTitleToZt():
12
+ def __init__(self):
13
+ self.fields = [
14
+ "lngid",
15
+ "rawid",
16
+ "title",
17
+ "title_alternative",
18
+ "title_sub",
19
+ "title_edition",
20
+ "title_series",
21
+ "identifier_eisbn",
22
+ "identifier_pisbn",
23
+ "identifier_doi",
24
+ "creator",
25
+ "creator_en",
26
+ "creator_bio",
27
+ "creator_institution",
28
+ "publisher",
29
+ "date",
30
+ "description",
31
+ "description_en",
32
+ "description_unit",
33
+ "subject",
34
+ "subject_en",
35
+ "subject_clc",
36
+ "subject_esc",
37
+ "page",
38
+ "beginpage",
39
+ "endpage",
40
+ "pagecount",
41
+ "date_created",
42
+ "rawtype",
43
+ "folio_size",
44
+ "price",
45
+ "pub_place",
46
+ "language",
47
+ "country",
48
+ "type",
49
+ "provider",
50
+ "provider_url",
51
+ "provider_id",
52
+ "medium",
53
+ "batch",
54
+ "is_deprecated"
55
+ ]
56
+ self.zt_providermap = dict()
57
+ self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
58
+ "host": "192.168.31.24",
59
+ "user": "root",
60
+ "passwd": "vipdatacenter",
61
+ "db": "data_warehouse_sql",
62
+ "port": "3306",
63
+ "chartset": "utf8mb4",
64
+ })
65
+ self.get_zt_provider()
66
+
67
+ def get_zt_provider(self):
68
+ rows = self.mysqlutils.SelectFromDB(
69
+ "select sub_db_id,provider from a_transform_task where source_type = '1' and out_type = 'zt'")
70
+ for row in rows[1]:
71
+ self.zt_providermap[row[0]] = row[1]
72
+
73
+ def transform(self, titleMap):
74
+ transMap = dict()
75
+ sub_db_id = titleMap.get("sub_db_id", "")
76
+ transMap["lngid"] = titleMap.get("lngid", "")
77
+ transMap["rawid"] = titleMap.get("rawid", "")
78
+ transMap["title"] = titleMap.get("title", "")
79
+ transMap["title_alternative"] = titleMap.get("title_alt", "")
80
+ transMap["title_series"] = titleMap.get("title_series", "")
81
+ transMap["title_edition"] = titleMap.get("revision", "")
82
+ transMap["identifier_eisbn"] = titleMap.get("eisbn", "")
83
+ transMap["identifier_pisbn"] = titleMap.get("isbn", "")
84
+ transMap["identifier_doi"] = titleMap.get("doi", "")
85
+ transMap["creator"] = titleMap.get("author", "")
86
+ transMap["creator_en"] = titleMap.get("author_alt", "")
87
+ transMap["creator_institution"] = titleMap.get("organ", "")
88
+ transMap["creator_bio"] = titleMap.get("author_intro", "")
89
+ transMap["publisher"] = titleMap.get("publisher", "")
90
+ transMap["date"] = titleMap.get("pub_year", "")
91
+ transMap["description"] = titleMap.get("abstract", "")
92
+ transMap["description_en"] = titleMap.get("abstract_alt", "")
93
+ transMap["description_unit"] = titleMap.get("catalog", "")
94
+ transMap["subject"] = titleMap.get("keyword", "")
95
+ transMap["subject_en"] = titleMap.get("keyword_alt", "")
96
+ transMap["subject_clc"] = titleMap.get("clc_no", "")
97
+ transMap["subject_esc"] = titleMap.get("subject_edu", "")
98
+ transMap["page"] = titleMap.get("page_info", "")
99
+ transMap["beginpage"] = titleMap.get("begin_page", "")
100
+ transMap["endpage"] = titleMap.get("end_page", "")
101
+ transMap["pagecount"] = titleMap.get("page_cnt", "")
102
+ transMap["date_created"] = titleMap.get("pub_date", "")
103
+ transMap["rawtype"] = titleMap.get("raw_type", "")
104
+ transMap["folio_size"] = titleMap.get("book_size", "")
105
+ transMap["price"] = titleMap.get("price", "")
106
+ transMap["pub_place"] = titleMap.get("pub_place", "")
107
+ # transMap["provider"] = titleMap.get("zt_provider", "")
108
+ # if transMap["provider"] == "":
109
+ transMap["provider"] = self.zt_providermap[sub_db_id]
110
+ transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
111
+ transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
112
+ transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
113
+ transMap["type"] = titleMap.get("source_type", "")
114
+ transMap["medium"] = "2"
115
+ transMap["country"] = titleMap.get("country", "")
116
+ transMap["language"] = titleMap.get("language", "")
117
+ transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
118
+
119
+ for field in self.fields:
120
+ if field not in transMap.keys():
121
+ transMap[field] = ""
122
+ return transMap
123
+
124
+ if __name__ == '__main__':
125
+ t = TransformBookTitleToZt()
@@ -0,0 +1,139 @@
1
+ # -*- coding:utf-8 -*-
2
+ # @Time : 2021/12/15 14:09
3
+ # @Author: suhong
4
+ # @File : TransformConferenceTitleToZt.py
5
+ # @Function :转换会议a层到智图代码
6
+ from re_common.baselibrary.utils.basetime import BaseTime
7
+
8
+ from re_common.facade.mysqlfacade import MysqlUtiles
9
+
10
+
11
+ class TransformConferenceTitleToZt():
12
+ def __init__(self):
13
+ # 初始化fields
14
+ self.fields = [
15
+ "lngid",
16
+ "rawid",
17
+ "title",
18
+ "title_alternative",
19
+ "title_series",
20
+ "title_edition",
21
+ "applicant",
22
+ "identifier_doi",
23
+ "creator",
24
+ "creator_en",
25
+ "creator_institution",
26
+ "creator_bio",
27
+ "creator_drafting",
28
+ "creator_release",
29
+ "source",
30
+ "source_en",
31
+ "source_institution",
32
+ "publisher",
33
+ "date",
34
+ "description",
35
+ "description_en",
36
+ "description_fund",
37
+ "subject",
38
+ "subject_en",
39
+ "page",
40
+ "beginpage",
41
+ "endpage",
42
+ "jumppage",
43
+ "pagecount",
44
+ "subject_clc",
45
+ "subject_esc",
46
+ "date_created",
47
+ "pub_place",
48
+ "language",
49
+ "country",
50
+ "type",
51
+ "provider",
52
+ "provider_url",
53
+ "provider_id",
54
+ "medium",
55
+ "batch",
56
+ "is_deprecated",
57
+ "if_pdf_fulltext",
58
+ "if_html_fulltext"
59
+ ]
60
+ self.zt_providermap = dict()
61
+ self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
62
+ "host": "192.168.31.24",
63
+ "user": "root",
64
+ "passwd": "vipdatacenter",
65
+ "db": "data_warehouse_sql",
66
+ "port": "3306",
67
+ "chartset": "utf8mb4",
68
+ })
69
+ self.get_zt_provider()
70
+
71
+ def get_zt_provider(self):
72
+ rows = self.mysqlutils.SelectFromDB(
73
+ "select sub_db_id,provider from a_transform_task where source_type = '6' and out_type = 'zt'")
74
+ for row in rows[1]:
75
+ self.zt_providermap[row[0]] = row[1]
76
+
77
+ def transform(self, titleMap):
78
+ transMap = dict()
79
+ sub_db_id = titleMap.get("sub_db_id", "")
80
+ transMap["lngid"] = titleMap.get("lngid", "")
81
+ transMap["rawid"] = titleMap.get("rawid", "")
82
+ transMap["title"] = titleMap.get("title", "")
83
+ transMap["title_alternative"] = titleMap.get("title_alt", "")
84
+ transMap["title_series"] = titleMap.get("meeting_record_name", "")
85
+ transMap["title_edition"] = titleMap.get("revision", "")
86
+ transMap["applicant"] = titleMap.get("applicant", "")
87
+ transMap["identifier_doi"] = titleMap.get("doi", "")
88
+ transMap["creator"] = titleMap.get("author", "")
89
+ transMap["creator_en"] = titleMap.get("author_alt", "")
90
+ transMap["creator_institution"] = titleMap.get("organ", "")
91
+ transMap["creator_bio"] = titleMap.get("author_intro", "")
92
+ transMap["creator_drafting"] = titleMap.get("society", "")
93
+ transMap["creator_release"] = titleMap.get("host_organ", "")
94
+ transMap["source"] = titleMap.get("meeting_name", "")
95
+ transMap["source_en"] = titleMap.get("meeting_name_alt", "")
96
+ transMap["source_institution"] = titleMap.get("meeting_place", "")
97
+ transMap["publisher"] = titleMap.get("publisher", "")
98
+ transMap["date"] = titleMap.get("pub_year", "")
99
+ transMap["description"] = titleMap.get("abstract", "")
100
+ transMap["description_en"] = titleMap.get("abstract_alt", "")
101
+ transMap["description_fund"] = titleMap.get("fund", "")
102
+ transMap["subject"] = titleMap.get("keyword", "")
103
+ transMap["subject_en"] = titleMap.get("keyword_alt", "")
104
+ transMap["page"] = titleMap.get("page_info", "")
105
+ transMap["beginpage"] = titleMap.get("begin_page", "")
106
+ transMap["endpage"] = titleMap.get("end_page", "")
107
+ transMap["jumppage"] = titleMap.get("jump_page", "")
108
+ transMap["pagecount"] = titleMap.get("page_cnt", "")
109
+ transMap["subject_clc"] = titleMap.get("clc_no", "")
110
+ transMap["subject_esc"] = titleMap.get("subject_edu", "")
111
+ transMap["date_created"] = titleMap.get("pub_date", "")
112
+ transMap["pub_place"] = titleMap.get("pub_place", "")
113
+ # transMap["provider"] = titleMap.get("zt_provider", "")
114
+ # if transMap["provider"] == "":
115
+ transMap["provider"] = self.zt_providermap[sub_db_id]
116
+ transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
117
+ transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
118
+ transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
119
+ transMap["type"] = titleMap.get("source_type", "")
120
+ transMap["medium"] = "2"
121
+ transMap["country"] = titleMap.get("country", "")
122
+ transMap["language"] = titleMap.get("language", "")
123
+ fulltext_type = titleMap.get("fulltext_type", "")
124
+ transMap["if_pdf_fulltext"] = "0"
125
+ transMap["if_html_fulltext"] = "0"
126
+ if "pdf" in fulltext_type:
127
+ transMap["if_pdf_fulltext"] = "1"
128
+ if "html" in fulltext_type:
129
+ transMap["if_html_fulltext"] = "1"
130
+ transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
131
+
132
+ for field in self.fields:
133
+ if field not in transMap.keys():
134
+ transMap[field] = ""
135
+ return transMap
136
+
137
+
138
+ if __name__ == '__main__':
139
+ t = TransformConferenceTitleToZt()