re-common 10.0.22__py3-none-any.whl → 10.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +195 -0
  145. re_common/v2/baselibrary/business_utils/__init__.py +0 -0
  146. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -0
  147. re_common/v2/baselibrary/decorators/utils.py +59 -59
  148. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  149. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -79
  150. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  151. re_common/v2/baselibrary/tools/dict_tools.py +37 -37
  152. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  153. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  154. re_common/v2/baselibrary/tools/list_tools.py +65 -65
  155. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  156. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  157. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  158. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  159. re_common/v2/baselibrary/utils/author_smi.py +360 -360
  160. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  161. re_common/v2/baselibrary/utils/basedict.py +37 -37
  162. re_common/v2/baselibrary/utils/basehdfs.py +161 -161
  163. re_common/v2/baselibrary/utils/basepika.py +180 -180
  164. re_common/v2/baselibrary/utils/basetime.py +77 -77
  165. re_common/v2/baselibrary/utils/db.py +38 -38
  166. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  167. re_common/v2/baselibrary/utils/mq.py +83 -83
  168. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  169. re_common/v2/baselibrary/utils/string_bool.py +186 -149
  170. re_common/v2/baselibrary/utils/string_clear.py +227 -204
  171. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  172. re_common/v2/baselibrary/utils/stringutils.py +213 -213
  173. re_common/vip/base_step_process.py +11 -11
  174. re_common/vip/baseencodeid.py +90 -90
  175. re_common/vip/changetaskname.py +28 -28
  176. re_common/vip/core_var.py +24 -24
  177. re_common/vip/mmh3Hash.py +89 -89
  178. re_common/vip/proxy/allproxys.py +127 -127
  179. re_common/vip/proxy/allproxys_thread.py +159 -159
  180. re_common/vip/proxy/cnki_proxy.py +153 -153
  181. re_common/vip/proxy/kuaidaili.py +87 -87
  182. re_common/vip/proxy/proxy_all.py +113 -113
  183. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  184. re_common/vip/proxy/wanfang_proxy.py +152 -152
  185. re_common/vip/proxy/wp_proxy_all.py +181 -181
  186. re_common/vip/read_rawid_to_txt.py +91 -91
  187. re_common/vip/title/__init__.py +5 -5
  188. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  189. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  190. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  191. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  192. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  193. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  194. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  195. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  196. re_common/vip/title/transform/__init__.py +10 -10
  197. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/LICENSE +201 -201
  198. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/METADATA +16 -16
  199. re_common-10.0.24.dist-info/RECORD +230 -0
  200. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/WHEEL +1 -1
  201. re_common-10.0.22.dist-info/RECORD +0 -227
  202. {re_common-10.0.22.dist-info → re_common-10.0.24.dist-info}/top_level.txt +0 -0
@@ -1,135 +1,135 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/21 13:47
3
- # @Author: suhong
4
- # @File : TransformStandardTitleToZt.py
5
- # @Function : 标准a层转智图
6
- from re_common.baselibrary.utils.basetime import BaseTime
7
-
8
- from re_common.facade.mysqlfacade import MysqlUtiles
9
-
10
-
11
- class TransformStandardTitleToZt():
12
- def __init__(self):
13
- self.fields = [
14
- "lngid",
15
- "rawid",
16
- "title",
17
- "title_alternative",
18
- "identifier_standard",
19
- "identifier_doi",
20
- "creator",
21
- "creator_en",
22
- "creator_institution",
23
- "creator_release",
24
- "date",
25
- "date_impl",
26
- "date_created",
27
- "description",
28
- "description_en",
29
- "description_type",
30
- "subject",
31
- "subject_en",
32
- "page",
33
- "subject_clc",
34
- "subject_esc",
35
- "subject_csc",
36
- "subject_isc",
37
- "legal_status",
38
- "language",
39
- "country",
40
- "type",
41
- "provider",
42
- "provider_url",
43
- "provider_id",
44
- "medium",
45
- "batch",
46
- "is_deprecated"
47
- ]
48
- self.zt_providermap = dict()
49
- self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
50
- "host": "192.168.31.24",
51
- "user": "root",
52
- "passwd": "vipdatacenter",
53
- "db": "data_warehouse_sql",
54
- "port": "3306",
55
- "chartset": "utf8mb4",
56
- })
57
- self.get_zt_provider()
58
-
59
- def get_zt_provider(self):
60
- rows = self.mysqlutils.SelectFromDB(
61
- "select sub_db_id,provider from a_transform_task where source_type = '5' and out_type = 'zt'")
62
- for row in rows[1]:
63
- self.zt_providermap[row[0]] = row[1]
64
-
65
- def transform(self, titleMap):
66
- transMap = dict()
67
- sub_db_id = titleMap.get("sub_db_id", "")
68
- transMap["lngid"] = titleMap.get("lngid", "")
69
- transMap["rawid"] = titleMap.get("rawid", "")
70
- transMap["title"] = titleMap.get("title", "")
71
- transMap["title_alternative"] = titleMap.get("title_alt", "")
72
- transMap["identifier_standard"] = titleMap.get("std_no", "")
73
- transMap["identifier_doi"] = titleMap.get("doi", "")
74
- transMap["creator"] = titleMap.get("author", "")
75
- transMap["creator_en"] = titleMap.get("author_alt", "")
76
- transMap["creator_institution"] = titleMap.get("organ", "")
77
- transMap["creator_release"] = titleMap.get("publisher", "")
78
- transMap["date"] = titleMap.get("pub_year", "")
79
- transMap["date_impl"] = titleMap.get("impl_date", "")
80
- transMap["date_created"] = titleMap.get("pub_date", "")
81
- transMap["description"] = titleMap.get("abstract", "")
82
- transMap["description_en"] = titleMap.get("abstract_alt", "")
83
- transMap["description_type"] = titleMap.get("raw_type", "")
84
- transMap["subject"] = titleMap.get("keyword", "")
85
- transMap["subject_en"] = titleMap.get("keyword_alt", "")
86
- transMap["page"] = titleMap.get("page_info", "")
87
- transMap["pagecount"] = titleMap.get("page_cnt", "")
88
- transMap["subject_clc"] = titleMap.get("clc_no", "")
89
- transMap["subject_esc"] = titleMap.get("subject_edu", "")
90
- transMap["subject_csc"] = titleMap.get("ccs_no", "")
91
- transMap["subject_isc"] = titleMap.get("ics_no", "")
92
- transMap["legal_status"] = titleMap.get("legal_status", "")
93
- # transMap["provider"] = titleMap.get("zt_provider", "")
94
- # if transMap["provider"] == "":
95
- transMap["provider"] = self.zt_providermap[sub_db_id]
96
- transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
97
- transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
98
- transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
99
- transMap["type"] = titleMap.get("source_type", "")
100
- transMap["medium"] = "2"
101
- transMap["country"] = titleMap.get("country", "")
102
- transMap["language"] = titleMap.get("language", "")
103
- transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
104
- for field in self.fields:
105
- if field not in transMap.keys():
106
- transMap[field] = ""
107
- return transMap
108
-
109
- if __name__ == '__main__':
110
- t = TransformStandardTitleToZt()
111
- filePath = r"D:\Tencent\WorkWeChat\WXWork\1688853051796109\Cache\File\2022-01\a_bz_20210104.txt"
112
- insert_list = list()
113
- insert_db3_path = "./zt_wanfangstandard_00030_update_20220104.db3"
114
- import json,sqlite3
115
- import pandas as pd
116
- with open(filePath, "r", encoding="utf-8") as file_to_read:
117
- while True:
118
- fLine = file_to_read.readline()
119
- xx = fLine.strip()
120
- try:
121
- data = json.loads(xx)
122
- new_data = t.transform(data)
123
- insert_list.append(new_data)
124
- if len(insert_list) >= 1000:
125
- insert_conn = sqlite3.connect(insert_db3_path, check_same_thread=False)
126
- pd.DataFrame(insert_list).to_sql("modify_title_info_zt", insert_conn, if_exists='append',
127
- index=False)
128
- insert_list.clear()
129
- except:
130
- print(xx)
131
-
132
- if not fLine:
133
- break
134
-
135
-
1
+ # -*- coding:utf-8 -*-
2
+ # @Time : 2021/12/21 13:47
3
+ # @Author: suhong
4
+ # @File : TransformStandardTitleToZt.py
5
+ # @Function : 标准a层转智图
6
+ from re_common.baselibrary.utils.basetime import BaseTime
7
+
8
+ from re_common.facade.mysqlfacade import MysqlUtiles
9
+
10
+
11
+ class TransformStandardTitleToZt():
12
+ def __init__(self):
13
+ self.fields = [
14
+ "lngid",
15
+ "rawid",
16
+ "title",
17
+ "title_alternative",
18
+ "identifier_standard",
19
+ "identifier_doi",
20
+ "creator",
21
+ "creator_en",
22
+ "creator_institution",
23
+ "creator_release",
24
+ "date",
25
+ "date_impl",
26
+ "date_created",
27
+ "description",
28
+ "description_en",
29
+ "description_type",
30
+ "subject",
31
+ "subject_en",
32
+ "page",
33
+ "subject_clc",
34
+ "subject_esc",
35
+ "subject_csc",
36
+ "subject_isc",
37
+ "legal_status",
38
+ "language",
39
+ "country",
40
+ "type",
41
+ "provider",
42
+ "provider_url",
43
+ "provider_id",
44
+ "medium",
45
+ "batch",
46
+ "is_deprecated"
47
+ ]
48
+ self.zt_providermap = dict()
49
+ self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
50
+ "host": "192.168.31.24",
51
+ "user": "root",
52
+ "passwd": "vipdatacenter",
53
+ "db": "data_warehouse_sql",
54
+ "port": "3306",
55
+ "chartset": "utf8mb4",
56
+ })
57
+ self.get_zt_provider()
58
+
59
+ def get_zt_provider(self):
60
+ rows = self.mysqlutils.SelectFromDB(
61
+ "select sub_db_id,provider from a_transform_task where source_type = '5' and out_type = 'zt'")
62
+ for row in rows[1]:
63
+ self.zt_providermap[row[0]] = row[1]
64
+
65
+ def transform(self, titleMap):
66
+ transMap = dict()
67
+ sub_db_id = titleMap.get("sub_db_id", "")
68
+ transMap["lngid"] = titleMap.get("lngid", "")
69
+ transMap["rawid"] = titleMap.get("rawid", "")
70
+ transMap["title"] = titleMap.get("title", "")
71
+ transMap["title_alternative"] = titleMap.get("title_alt", "")
72
+ transMap["identifier_standard"] = titleMap.get("std_no", "")
73
+ transMap["identifier_doi"] = titleMap.get("doi", "")
74
+ transMap["creator"] = titleMap.get("author", "")
75
+ transMap["creator_en"] = titleMap.get("author_alt", "")
76
+ transMap["creator_institution"] = titleMap.get("organ", "")
77
+ transMap["creator_release"] = titleMap.get("publisher", "")
78
+ transMap["date"] = titleMap.get("pub_year", "")
79
+ transMap["date_impl"] = titleMap.get("impl_date", "")
80
+ transMap["date_created"] = titleMap.get("pub_date", "")
81
+ transMap["description"] = titleMap.get("abstract", "")
82
+ transMap["description_en"] = titleMap.get("abstract_alt", "")
83
+ transMap["description_type"] = titleMap.get("raw_type", "")
84
+ transMap["subject"] = titleMap.get("keyword", "")
85
+ transMap["subject_en"] = titleMap.get("keyword_alt", "")
86
+ transMap["page"] = titleMap.get("page_info", "")
87
+ transMap["pagecount"] = titleMap.get("page_cnt", "")
88
+ transMap["subject_clc"] = titleMap.get("clc_no", "")
89
+ transMap["subject_esc"] = titleMap.get("subject_edu", "")
90
+ transMap["subject_csc"] = titleMap.get("ccs_no", "")
91
+ transMap["subject_isc"] = titleMap.get("ics_no", "")
92
+ transMap["legal_status"] = titleMap.get("legal_status", "")
93
+ # transMap["provider"] = titleMap.get("zt_provider", "")
94
+ # if transMap["provider"] == "":
95
+ transMap["provider"] = self.zt_providermap[sub_db_id]
96
+ transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
97
+ transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
98
+ transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
99
+ transMap["type"] = titleMap.get("source_type", "")
100
+ transMap["medium"] = "2"
101
+ transMap["country"] = titleMap.get("country", "")
102
+ transMap["language"] = titleMap.get("language", "")
103
+ transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
104
+ for field in self.fields:
105
+ if field not in transMap.keys():
106
+ transMap[field] = ""
107
+ return transMap
108
+
109
+ if __name__ == '__main__':
110
+ t = TransformStandardTitleToZt()
111
+ filePath = r"D:\Tencent\WorkWeChat\WXWork\1688853051796109\Cache\File\2022-01\a_bz_20210104.txt"
112
+ insert_list = list()
113
+ insert_db3_path = "./zt_wanfangstandard_00030_update_20220104.db3"
114
+ import json,sqlite3
115
+ import pandas as pd
116
+ with open(filePath, "r", encoding="utf-8") as file_to_read:
117
+ while True:
118
+ fLine = file_to_read.readline()
119
+ xx = fLine.strip()
120
+ try:
121
+ data = json.loads(xx)
122
+ new_data = t.transform(data)
123
+ insert_list.append(new_data)
124
+ if len(insert_list) >= 1000:
125
+ insert_conn = sqlite3.connect(insert_db3_path, check_same_thread=False)
126
+ pd.DataFrame(insert_list).to_sql("modify_title_info_zt", insert_conn, if_exists='append',
127
+ index=False)
128
+ insert_list.clear()
129
+ except:
130
+ print(xx)
131
+
132
+ if not fLine:
133
+ break
134
+
135
+
@@ -1,135 +1,135 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/15 14:09
3
- # @Author: suhong
4
- # @File : TransformThesisTitleToZt.py
5
- # @Function :转换博硕a层到智图代码
6
- from re_common.baselibrary.utils.basetime import BaseTime
7
-
8
- from re_common.facade.mysqlfacade import MysqlUtiles
9
-
10
-
11
- class TransformThesisTitleToZt():
12
- def __init__(self):
13
- # 初始化fields
14
- self.fields = [
15
- "lngid",
16
- "rawid",
17
- "title",
18
- "title_alternative",
19
- "title_sub",
20
- "identifier_doi",
21
- "creator",
22
- "creator_en",
23
- "creator_bio",
24
- "creator_degree",
25
- "creator_discipline",
26
- "creator_institution",
27
- "contributor",
28
- "description",
29
- "description_en"
30
- "subject",
31
- "subject_en",
32
- "subject_clc",
33
- "subject_esc",
34
- "subject_dsa",
35
- "date",
36
- "date_created",
37
- "provider",
38
- "provider_url",
39
- "provider_id",
40
- "description_fund",
41
- "page",
42
- "beginpage",
43
- "endpage",
44
- "jumppage",
45
- "pagecount",
46
- "batch",
47
- "type",
48
- "rawtype",
49
- "medium",
50
- "language",
51
- "country",
52
- "provider_subject",
53
- "identifier_pisbn",
54
- "identifier_eisbn",
55
- "price",
56
- "pub_place",
57
- "is_deprecated"
58
- ]
59
- self.zt_providermap = dict()
60
- self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
61
- "host": "192.168.31.24",
62
- "user": "root",
63
- "passwd": "vipdatacenter",
64
- "db": "data_warehouse_sql",
65
- "port": "3306",
66
- "chartset": "utf8mb4",
67
- })
68
- self.get_zt_provider()
69
-
70
- def get_zt_provider(self):
71
- rows = self.mysqlutils.SelectFromDB(
72
- "select sub_db_id,provider from a_transform_task where source_type = '4' and out_type = 'zt'")
73
- for row in rows[1]:
74
- self.zt_providermap[row[0]] = row[1]
75
-
76
- def transform(self, titleMap):
77
- transMap = dict()
78
- sub_db_id = titleMap.get("sub_db_id", "")
79
- transMap["lngid"] = titleMap.get("lngid", "")
80
- transMap["rawid"] = titleMap.get("rawid", "")
81
- transMap["title"] = titleMap.get("title", "")
82
- transMap["title_alternative"] = titleMap.get("title_alt", "")
83
- transMap["title_sub"] = titleMap.get("title_sub", "")
84
- transMap["identifier_doi"] = titleMap.get("doi", "")
85
- transMap["creator"] = titleMap.get("author", "")
86
- transMap["creator_en"] = titleMap.get("author_alt", "")
87
- transMap["creator_bio"] = titleMap.get("author_intro", "")
88
- transMap["creator_degree"] = titleMap.get("degree", "")
89
- transMap["creator_discipline"] = titleMap.get("subject_major", "")
90
- if transMap["creator_discipline"] == "":
91
- transMap["creator_discipline"] = titleMap.get("subject_dsa", "")
92
- transMap["creator_institution"] = titleMap.get("organ", "")
93
- transMap["contributor"] = titleMap.get("contributor", "")
94
- transMap["description"] = titleMap.get("abstract", "")
95
- transMap["description_en"] = titleMap.get("abstract_alt", "")
96
- transMap["subject"] = titleMap.get("keyword", "")
97
- transMap["subject_en"] = titleMap.get("keyword_alt", "")
98
- transMap["subject_clc"] = titleMap.get("clc_no", "")
99
- transMap["subject_esc"] = titleMap.get("subject_edu", "")
100
- transMap["date"] = titleMap.get("pub_year", "")
101
- transMap["date_created"] = titleMap.get("pub_date", "")
102
- # transMap["provider"] = titleMap.get("zt_provider", "")
103
- # if transMap["provider"] == "":
104
- transMap["provider"] = self.zt_providermap[sub_db_id]
105
- transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
106
- transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
107
- transMap["description_fund"] = titleMap.get("fund", "")
108
- transMap["page"] = titleMap.get("page_info", "")
109
- transMap["beginpage"] = titleMap.get("begin_page", "")
110
- transMap["endpage"] = titleMap.get("end_page", "")
111
- transMap["jumppage"] = titleMap.get("jump_page", "")
112
- transMap["pagecount"] = titleMap.get("page_cnt", "")
113
- transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
114
- transMap["type"] = titleMap.get("source_type", "")
115
- transMap["rawtype"] = titleMap.get("raw_type", "")
116
- transMap["medium"] = "2"
117
- transMap["country"] = titleMap.get("country", "")
118
- transMap["language"] = titleMap.get("language", "")
119
- transMap["provider_subject"] = titleMap.get("sub_db_class_name", "")
120
- transMap["identifier_pisbn"] = titleMap.get("isbn", "")
121
- transMap["identifier_eisbn"] = titleMap.get("eisbn", "")
122
- transMap["price"] = titleMap.get("price", "")
123
- transMap["pub_place"] = titleMap.get("pub_place", "")
124
- transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
125
-
126
- for field in self.fields:
127
- if field not in transMap.keys():
128
- transMap[field] = ""
129
- return transMap
130
-
131
- if __name__ == '__main__':
132
- t = TransformThesisTitleToZt()
133
-
134
-
135
-
1
+ # -*- coding:utf-8 -*-
2
+ # @Time : 2021/12/15 14:09
3
+ # @Author: suhong
4
+ # @File : TransformThesisTitleToZt.py
5
+ # @Function :转换博硕a层到智图代码
6
+ from re_common.baselibrary.utils.basetime import BaseTime
7
+
8
+ from re_common.facade.mysqlfacade import MysqlUtiles
9
+
10
+
11
+ class TransformThesisTitleToZt():
12
+ def __init__(self):
13
+ # 初始化fields
14
+ self.fields = [
15
+ "lngid",
16
+ "rawid",
17
+ "title",
18
+ "title_alternative",
19
+ "title_sub",
20
+ "identifier_doi",
21
+ "creator",
22
+ "creator_en",
23
+ "creator_bio",
24
+ "creator_degree",
25
+ "creator_discipline",
26
+ "creator_institution",
27
+ "contributor",
28
+ "description",
29
+ "description_en"
30
+ "subject",
31
+ "subject_en",
32
+ "subject_clc",
33
+ "subject_esc",
34
+ "subject_dsa",
35
+ "date",
36
+ "date_created",
37
+ "provider",
38
+ "provider_url",
39
+ "provider_id",
40
+ "description_fund",
41
+ "page",
42
+ "beginpage",
43
+ "endpage",
44
+ "jumppage",
45
+ "pagecount",
46
+ "batch",
47
+ "type",
48
+ "rawtype",
49
+ "medium",
50
+ "language",
51
+ "country",
52
+ "provider_subject",
53
+ "identifier_pisbn",
54
+ "identifier_eisbn",
55
+ "price",
56
+ "pub_place",
57
+ "is_deprecated"
58
+ ]
59
+ self.zt_providermap = dict()
60
+ self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
61
+ "host": "192.168.31.24",
62
+ "user": "root",
63
+ "passwd": "vipdatacenter",
64
+ "db": "data_warehouse_sql",
65
+ "port": "3306",
66
+ "chartset": "utf8mb4",
67
+ })
68
+ self.get_zt_provider()
69
+
70
+ def get_zt_provider(self):
71
+ rows = self.mysqlutils.SelectFromDB(
72
+ "select sub_db_id,provider from a_transform_task where source_type = '4' and out_type = 'zt'")
73
+ for row in rows[1]:
74
+ self.zt_providermap[row[0]] = row[1]
75
+
76
+ def transform(self, titleMap):
77
+ transMap = dict()
78
+ sub_db_id = titleMap.get("sub_db_id", "")
79
+ transMap["lngid"] = titleMap.get("lngid", "")
80
+ transMap["rawid"] = titleMap.get("rawid", "")
81
+ transMap["title"] = titleMap.get("title", "")
82
+ transMap["title_alternative"] = titleMap.get("title_alt", "")
83
+ transMap["title_sub"] = titleMap.get("title_sub", "")
84
+ transMap["identifier_doi"] = titleMap.get("doi", "")
85
+ transMap["creator"] = titleMap.get("author", "")
86
+ transMap["creator_en"] = titleMap.get("author_alt", "")
87
+ transMap["creator_bio"] = titleMap.get("author_intro", "")
88
+ transMap["creator_degree"] = titleMap.get("degree", "")
89
+ transMap["creator_discipline"] = titleMap.get("subject_major", "")
90
+ if transMap["creator_discipline"] == "":
91
+ transMap["creator_discipline"] = titleMap.get("subject_dsa", "")
92
+ transMap["creator_institution"] = titleMap.get("organ", "")
93
+ transMap["contributor"] = titleMap.get("contributor", "")
94
+ transMap["description"] = titleMap.get("abstract", "")
95
+ transMap["description_en"] = titleMap.get("abstract_alt", "")
96
+ transMap["subject"] = titleMap.get("keyword", "")
97
+ transMap["subject_en"] = titleMap.get("keyword_alt", "")
98
+ transMap["subject_clc"] = titleMap.get("clc_no", "")
99
+ transMap["subject_esc"] = titleMap.get("subject_edu", "")
100
+ transMap["date"] = titleMap.get("pub_year", "")
101
+ transMap["date_created"] = titleMap.get("pub_date", "")
102
+ # transMap["provider"] = titleMap.get("zt_provider", "")
103
+ # if transMap["provider"] == "":
104
+ transMap["provider"] = self.zt_providermap[sub_db_id]
105
+ transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
106
+ transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
107
+ transMap["description_fund"] = titleMap.get("fund", "")
108
+ transMap["page"] = titleMap.get("page_info", "")
109
+ transMap["beginpage"] = titleMap.get("begin_page", "")
110
+ transMap["endpage"] = titleMap.get("end_page", "")
111
+ transMap["jumppage"] = titleMap.get("jump_page", "")
112
+ transMap["pagecount"] = titleMap.get("page_cnt", "")
113
+ transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
114
+ transMap["type"] = titleMap.get("source_type", "")
115
+ transMap["rawtype"] = titleMap.get("raw_type", "")
116
+ transMap["medium"] = "2"
117
+ transMap["country"] = titleMap.get("country", "")
118
+ transMap["language"] = titleMap.get("language", "")
119
+ transMap["provider_subject"] = titleMap.get("sub_db_class_name", "")
120
+ transMap["identifier_pisbn"] = titleMap.get("isbn", "")
121
+ transMap["identifier_eisbn"] = titleMap.get("eisbn", "")
122
+ transMap["price"] = titleMap.get("price", "")
123
+ transMap["pub_place"] = titleMap.get("pub_place", "")
124
+ transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
125
+
126
+ for field in self.fields:
127
+ if field not in transMap.keys():
128
+ transMap[field] = ""
129
+ return transMap
130
+
131
+ if __name__ == '__main__':
132
+ t = TransformThesisTitleToZt()
133
+
134
+
135
+
@@ -1,11 +1,11 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/2 9:38
3
- # @Author: suhong
4
- # @File : __init__.py.py
5
- # @Function :
6
-
7
- from re_common.baselibrary.utils.basetime import BaseTime
8
-
9
-
10
- x = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
1
+ # -*- coding:utf-8 -*-
2
+ # @Time : 2021/12/2 9:38
3
+ # @Author: suhong
4
+ # @File : __init__.py.py
5
+ # @Function :
6
+
7
+ from re_common.baselibrary.utils.basetime import BaseTime
8
+
9
+
10
+ x = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
11
11
  print(x)