re-common 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. re_common/v2/baselibrary/tools/search_hash_tools.py +33 -0
  2. re_common/v2/baselibrary/tools/text_matcher.py +223 -0
  3. re_common/v2/baselibrary/utils/BusinessStringUtil.py +2 -2
  4. re_common/v2/baselibrary/utils/author_smi.py +308 -0
  5. re_common/v2/baselibrary/utils/string_clear.py +15 -1
  6. re_common/v2/baselibrary/utils/stringutils.py +36 -1
  7. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/METADATA +1 -1
  8. re_common-2.0.1.dist-info/RECORD +25 -0
  9. re_common/baselibrary/__init__.py +0 -4
  10. re_common/baselibrary/baseabs/__init__.py +0 -7
  11. re_common/baselibrary/baseabs/baseabs.py +0 -26
  12. re_common/baselibrary/database/mbuilder.py +0 -132
  13. re_common/baselibrary/database/moudle.py +0 -93
  14. re_common/baselibrary/database/msqlite3.py +0 -194
  15. re_common/baselibrary/database/mysql.py +0 -169
  16. re_common/baselibrary/database/sql_factory.py +0 -26
  17. re_common/baselibrary/mthread/MThreadingRun.py +0 -486
  18. re_common/baselibrary/mthread/MThreadingRunEvent.py +0 -349
  19. re_common/baselibrary/mthread/__init__.py +0 -3
  20. re_common/baselibrary/mthread/mythreading.py +0 -695
  21. re_common/baselibrary/pakge_other/__init__.py +0 -0
  22. re_common/baselibrary/pakge_other/socks.py +0 -404
  23. re_common/baselibrary/readconfig/__init__.py +0 -0
  24. re_common/baselibrary/readconfig/config_factory.py +0 -18
  25. re_common/baselibrary/readconfig/ini_config.py +0 -317
  26. re_common/baselibrary/readconfig/toml_config.py +0 -49
  27. re_common/baselibrary/temporary/__init__.py +0 -0
  28. re_common/baselibrary/temporary/envdata.py +0 -36
  29. re_common/baselibrary/tools/__init__.py +0 -0
  30. re_common/baselibrary/tools/all_requests/__init__.py +0 -0
  31. re_common/baselibrary/tools/all_requests/aiohttp_request.py +0 -118
  32. re_common/baselibrary/tools/all_requests/httpx_requet.py +0 -102
  33. re_common/baselibrary/tools/all_requests/mrequest.py +0 -412
  34. re_common/baselibrary/tools/all_requests/requests_request.py +0 -81
  35. re_common/baselibrary/tools/batch_compre/__init__.py +0 -0
  36. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +0 -31
  37. re_common/baselibrary/tools/contrast_db3.py +0 -123
  38. re_common/baselibrary/tools/copy_file.py +0 -39
  39. re_common/baselibrary/tools/db3_2_sizedb3.py +0 -102
  40. re_common/baselibrary/tools/foreachgz.py +0 -40
  41. re_common/baselibrary/tools/get_attr.py +0 -11
  42. re_common/baselibrary/tools/image_to_pdf.py +0 -62
  43. re_common/baselibrary/tools/java_code_deal.py +0 -139
  44. re_common/baselibrary/tools/javacode.py +0 -79
  45. re_common/baselibrary/tools/mdb_db3.py +0 -48
  46. re_common/baselibrary/tools/merge_file.py +0 -171
  47. re_common/baselibrary/tools/merge_gz_file.py +0 -165
  48. re_common/baselibrary/tools/mhdfstools/__init__.py +0 -0
  49. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +0 -42
  50. re_common/baselibrary/tools/mhdfstools/hdfst.py +0 -42
  51. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +0 -38
  52. re_common/baselibrary/tools/mongo_tools.py +0 -50
  53. re_common/baselibrary/tools/move_file.py +0 -170
  54. re_common/baselibrary/tools/move_mongo/__init__.py +0 -0
  55. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +0 -63
  56. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +0 -354
  57. re_common/baselibrary/tools/move_mongo/use_mttf.py +0 -18
  58. re_common/baselibrary/tools/move_mongo/use_mv.py +0 -93
  59. re_common/baselibrary/tools/mpandas/__init__.py +0 -0
  60. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +0 -125
  61. re_common/baselibrary/tools/mpandas/pandas_visualization.py +0 -8
  62. re_common/baselibrary/tools/myparsel.py +0 -104
  63. re_common/baselibrary/tools/rename_dir_file.py +0 -37
  64. re_common/baselibrary/tools/sequoiadb_utils.py +0 -398
  65. re_common/baselibrary/tools/split_line_to_many.py +0 -25
  66. re_common/baselibrary/tools/stringtodicts.py +0 -33
  67. re_common/baselibrary/tools/workwechant_bot.py +0 -84
  68. re_common/baselibrary/utils/__init__.py +0 -0
  69. re_common/baselibrary/utils/baseaiohttp.py +0 -296
  70. re_common/baselibrary/utils/baseaiomysql.py +0 -87
  71. re_common/baselibrary/utils/baseallstep.py +0 -191
  72. re_common/baselibrary/utils/baseavro.py +0 -19
  73. re_common/baselibrary/utils/baseboto3.py +0 -291
  74. re_common/baselibrary/utils/basecsv.py +0 -32
  75. re_common/baselibrary/utils/basedict.py +0 -133
  76. re_common/baselibrary/utils/basedir.py +0 -241
  77. re_common/baselibrary/utils/baseencode.py +0 -351
  78. re_common/baselibrary/utils/baseencoding.py +0 -29
  79. re_common/baselibrary/utils/baseesdsl.py +0 -86
  80. re_common/baselibrary/utils/baseexcel.py +0 -264
  81. re_common/baselibrary/utils/baseexcept.py +0 -109
  82. re_common/baselibrary/utils/basefile.py +0 -654
  83. re_common/baselibrary/utils/baseftp.py +0 -214
  84. re_common/baselibrary/utils/basegzip.py +0 -60
  85. re_common/baselibrary/utils/basehdfs.py +0 -135
  86. re_common/baselibrary/utils/basehttpx.py +0 -268
  87. re_common/baselibrary/utils/baseip.py +0 -87
  88. re_common/baselibrary/utils/basejson.py +0 -2
  89. re_common/baselibrary/utils/baselist.py +0 -32
  90. re_common/baselibrary/utils/basemotor.py +0 -190
  91. re_common/baselibrary/utils/basemssql.py +0 -98
  92. re_common/baselibrary/utils/baseodbc.py +0 -113
  93. re_common/baselibrary/utils/basepandas.py +0 -302
  94. re_common/baselibrary/utils/basepeewee.py +0 -11
  95. re_common/baselibrary/utils/basepika.py +0 -180
  96. re_common/baselibrary/utils/basepydash.py +0 -143
  97. re_common/baselibrary/utils/basepymongo.py +0 -230
  98. re_common/baselibrary/utils/basequeue.py +0 -22
  99. re_common/baselibrary/utils/baserar.py +0 -57
  100. re_common/baselibrary/utils/baserequest.py +0 -279
  101. re_common/baselibrary/utils/baseset.py +0 -8
  102. re_common/baselibrary/utils/basesmb.py +0 -403
  103. re_common/baselibrary/utils/basestring.py +0 -382
  104. re_common/baselibrary/utils/basetime.py +0 -320
  105. re_common/baselibrary/utils/basetuple.py +0 -0
  106. re_common/baselibrary/utils/baseurl.py +0 -121
  107. re_common/baselibrary/utils/basezip.py +0 -57
  108. re_common/baselibrary/utils/core/__init__.py +0 -8
  109. re_common/baselibrary/utils/core/bottomutils.py +0 -18
  110. re_common/baselibrary/utils/core/mdeprecated.py +0 -327
  111. re_common/baselibrary/utils/core/mlamada.py +0 -16
  112. re_common/baselibrary/utils/core/msginfo.py +0 -25
  113. re_common/baselibrary/utils/core/requests_core.py +0 -103
  114. re_common/baselibrary/utils/fateadm.py +0 -429
  115. re_common/baselibrary/utils/importfun.py +0 -123
  116. re_common/baselibrary/utils/mfaker.py +0 -57
  117. re_common/baselibrary/utils/my_abc/__init__.py +0 -3
  118. re_common/baselibrary/utils/my_abc/better_abc.py +0 -32
  119. re_common/baselibrary/utils/mylogger.py +0 -414
  120. re_common/baselibrary/utils/myredisclient.py +0 -861
  121. re_common/baselibrary/utils/pipupgrade.py +0 -21
  122. re_common/baselibrary/utils/ringlist.py +0 -85
  123. re_common/baselibrary/utils/version_compare.py +0 -36
  124. re_common/baselibrary/utils/ydmhttp.py +0 -126
  125. re_common/facade/__init__.py +0 -1
  126. re_common/facade/lazy_import.py +0 -11
  127. re_common/facade/loggerfacade.py +0 -25
  128. re_common/facade/mysqlfacade.py +0 -467
  129. re_common/facade/now.py +0 -31
  130. re_common/facade/sqlite3facade.py +0 -257
  131. re_common/facade/use/__init__.py +0 -0
  132. re_common/facade/use/mq_use_facade.py +0 -83
  133. re_common/facade/use/proxy_use_facade.py +0 -20
  134. re_common/libtest/__init__.py +0 -0
  135. re_common/libtest/base_dict_test.py +0 -19
  136. re_common/libtest/baseavro_test.py +0 -13
  137. re_common/libtest/basefile_test.py +0 -14
  138. re_common/libtest/basemssql_test.py +0 -77
  139. re_common/libtest/baseodbc_test.py +0 -8
  140. re_common/libtest/basepandas_test.py +0 -38
  141. re_common/libtest/get_attr_test/__init__.py +0 -0
  142. re_common/libtest/get_attr_test/get_attr_test_settings.py +0 -14
  143. re_common/libtest/get_attr_test/settings.py +0 -55
  144. re_common/libtest/idencode_test.py +0 -54
  145. re_common/libtest/iniconfig_test.py +0 -35
  146. re_common/libtest/ip_test.py +0 -35
  147. re_common/libtest/merge_file_test.py +0 -20
  148. re_common/libtest/mfaker_test.py +0 -9
  149. re_common/libtest/mm3_test.py +0 -32
  150. re_common/libtest/mylogger_test.py +0 -89
  151. re_common/libtest/myparsel_test.py +0 -28
  152. re_common/libtest/mysql_test.py +0 -151
  153. re_common/libtest/pymongo_test.py +0 -21
  154. re_common/libtest/split_test.py +0 -12
  155. re_common/libtest/sqlite3_merge_test.py +0 -6
  156. re_common/libtest/sqlite3_test.py +0 -34
  157. re_common/libtest/tomlconfig_test.py +0 -30
  158. re_common/libtest/use_tools_test/__init__.py +0 -3
  159. re_common/libtest/user/__init__.py +0 -5
  160. re_common/studio/__init__.py +0 -5
  161. re_common/studio/assignment_expressions.py +0 -37
  162. re_common/studio/mydash/__init__.py +0 -0
  163. re_common/studio/mydash/test1.py +0 -19
  164. re_common/studio/pydashstudio/__init__.py +0 -0
  165. re_common/studio/pydashstudio/first.py +0 -9
  166. re_common/studio/streamlitstudio/__init__.py +0 -0
  167. re_common/studio/streamlitstudio/first_app.py +0 -66
  168. re_common/studio/streamlitstudio/uber_pickups.py +0 -24
  169. re_common/studio/test.py +0 -19
  170. re_common/vip/__init__.py +0 -0
  171. re_common/vip/base_step_process.py +0 -11
  172. re_common/vip/baseencodeid.py +0 -91
  173. re_common/vip/changetaskname.py +0 -28
  174. re_common/vip/core_var.py +0 -24
  175. re_common/vip/mmh3Hash.py +0 -90
  176. re_common/vip/proxy/__init__.py +0 -0
  177. re_common/vip/proxy/allproxys.py +0 -127
  178. re_common/vip/proxy/allproxys_thread.py +0 -159
  179. re_common/vip/proxy/cnki_proxy.py +0 -153
  180. re_common/vip/proxy/kuaidaili.py +0 -87
  181. re_common/vip/proxy/proxy_all.py +0 -113
  182. re_common/vip/proxy/update_kuaidaili_0.py +0 -42
  183. re_common/vip/proxy/wanfang_proxy.py +0 -152
  184. re_common/vip/proxy/wp_proxy_all.py +0 -182
  185. re_common/vip/read_rawid_to_txt.py +0 -92
  186. re_common/vip/title/__init__.py +0 -5
  187. re_common/vip/title/transform/TransformBookTitleToZt.py +0 -125
  188. re_common/vip/title/transform/TransformConferenceTitleToZt.py +0 -139
  189. re_common/vip/title/transform/TransformCstadTitleToZt.py +0 -196
  190. re_common/vip/title/transform/TransformJournalTitleToZt.py +0 -203
  191. re_common/vip/title/transform/TransformPatentTitleToZt.py +0 -132
  192. re_common/vip/title/transform/TransformRegulationTitleToZt.py +0 -114
  193. re_common/vip/title/transform/TransformStandardTitleToZt.py +0 -135
  194. re_common/vip/title/transform/TransformThesisTitleToZt.py +0 -135
  195. re_common/vip/title/transform/__init__.py +0 -11
  196. re_common-2.0.0.dist-info/RECORD +0 -209
  197. /re_common/{baselibrary/database/__init__.py → v2/baselibrary/tools/list_tools.py} +0 -0
  198. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/LICENSE +0 -0
  199. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/WHEEL +0 -0
  200. {re_common-2.0.0.dist-info → re_common-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,196 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/19 18:17
3
- # @Author: suhong
4
- # @File : TransformCstadTitleToZt.py
5
- # @Function : 成果a层转智图
6
- from re_common.baselibrary.utils.basetime import BaseTime
7
-
8
- from re_common.facade.mysqlfacade import MysqlUtiles
9
-
10
-
11
- class TransformCstadTitleToZt():
12
- def __init__(self):
13
- # 初始化fields
14
- self.fields = [
15
- "lngid",
16
- "rawid",
17
- "title",
18
- "title_alternative",
19
- "title_series",
20
- "title_edition",
21
- "identifier_doi",
22
- "creator",
23
- "creator_en",
24
- "creator_institution",
25
- "creator_bio",
26
- "description",
27
- "description_en",
28
- "subject",
29
- "subject_en",
30
- "date",
31
- "date_created",
32
- "subject_clc",
33
- "subject_esc",
34
- "description_type",
35
- "language",
36
- "country",
37
- "type",
38
- "provider",
39
- "provider_url",
40
- "provider_id",
41
- "medium",
42
- "batch",
43
- "is_deprecated"
44
- ]
45
- self.zt_providermap = dict()
46
- self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
47
- "host": "192.168.31.24",
48
- "user": "root",
49
- "passwd": "vipdatacenter",
50
- "db": "data_warehouse_sql",
51
- "port": "3306",
52
- "chartset": "utf8mb4",
53
- })
54
- self.get_zt_provider()
55
-
56
- def get_zt_provider(self):
57
- rows = self.mysqlutils.SelectFromDB(
58
- "select sub_db_id,provider from a_transform_task where source_type = '9' and out_type = 'zt'")
59
- for row in rows[1]:
60
- self.zt_providermap[row[0]] = row[1]
61
-
62
- def transform(self, titleMap):
63
- transMap = dict()
64
- sub_db_id = titleMap.get("sub_db_id", "")
65
- transMap["lngid"] = titleMap.get("lngid", "")
66
- transMap["rawid"] = titleMap.get("rawid", "")
67
- transMap["title"] = titleMap.get("title", "")
68
- transMap["title_alternative"] = titleMap.get("title_alt", "")
69
- transMap["title_series"] = titleMap.get("title_series", "")
70
- transMap["title_edition"] = titleMap.get("revision", "")
71
- transMap["identifier_doi"] = titleMap.get("doi", "")
72
- transMap["creator"] = titleMap.get("author", "")
73
- transMap["creator_en"] = titleMap.get("author_alt", "")
74
- transMap["creator_institution"] = titleMap.get("organ", "")
75
- transMap["creator_bio"] = titleMap.get("author_intro", "")
76
- transMap["description"] = titleMap.get("abstract", "")
77
- transMap["description_en"] = titleMap.get("abstract_alt", "")
78
- transMap["subject"] = titleMap.get("keyword", "")
79
- transMap["subject_en"] = titleMap.get("keyword_alt", "")
80
- transMap["date"] = titleMap.get("pub_year", "")
81
- transMap["date_created"] = titleMap.get("pub_date", "")
82
- transMap["subject_clc"] = titleMap.get("clc_no", "")
83
- transMap["subject_esc"] = titleMap.get("subject_edu", "")
84
- transMap["description_type"] = titleMap.get("raw_type", "")
85
- # transMap["provider"] = titleMap.get("zt_provider", "")
86
- # if transMap["provider"] == "":
87
- transMap["provider"] = self.zt_providermap[sub_db_id]
88
- transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
89
- transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
90
- transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
91
- transMap["type"] = titleMap.get("source_type", "")
92
- transMap["medium"] = "2"
93
- transMap["country"] = titleMap.get("country", "")
94
- transMap["language"] = titleMap.get("language", "")
95
- transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
96
-
97
- for field in self.fields:
98
- if field not in transMap.keys():
99
- transMap[field] = ""
100
- return transMap
101
-
102
- if __name__ == '__main__':
103
- t = TransformCstadTitleToZt()
104
- data = {
105
- "_id": "00275FCILN8O9JP1MPDO8IP067BG0ILZ",
106
- "applicant_organ": "",
107
- "sub_db": "SNAD",
108
- "country": "CN",
109
- "register_no": "",
110
- "clc_machine": "",
111
- "subject": "",
112
- "rawid_alt": "",
113
- "recommend_organ_code": "",
114
- "transfer_terms": "",
115
- "language": "ZH",
116
- "from_to_date": "2016-10~2018-09",
117
- "investment_amount": "",
118
- "std_no": "",
119
- "latest_date": "20210120",
120
- "clc_no": "R472.2",
121
- "app_date": "",
122
- "keyword": "ICU;获得性;干预策略;集束化",
123
- "investment_explain": "",
124
- "transfer_annotation": "",
125
- "fax": "",
126
- "save_money": "",
127
- "patent_cnt": "",
128
- "lngid": "00275FCILN8O9JP1MPDO8IP067BG0ILZ",
129
- "level": "",
130
- "author": "徐玲芬;谢波;周庆;姬晓伟;李敏;钟玉英;钟瑞英;姜勤;刘晓博",
131
- "sub_db_id": "00275",
132
- "postcode": "",
133
- "batch": "20201010_103123;20210607_085602",
134
- "keyid": "00275FCILN8O9JP1MPDO8IP067BG0ILZ",
135
- "down_date": "20201010;20201010",
136
- "source_type": "9",
137
- "tax": "",
138
- "provider_url": "https://kns.cnki.net/kcms/detail/detail.aspx?dbname=SNAD&filename=SNAD000001839999",
139
- "transfer_scope": "",
140
- "plan_name": "",
141
- "organ_alt": "",
142
- "vision": "1",
143
- "subject_edu": "320.71",
144
- "rawid": "SNAD000001839999",
145
- "subject_word": "",
146
- "investment_annotation": "",
147
- "trade_no": "",
148
- "author_id": "",
149
- "build_duration": "",
150
- "organ_area": "",
151
- "organ": "湖州市中心医院",
152
- "recommend_no": "",
153
- "authorization_no": "",
154
- "corr_organ_addr": "",
155
- "spread_explain": "",
156
- "title": "基于循证构建ICU获得性衰弱集束化干预策略及其应用研究",
157
- "keyword_alt": "",
158
- "trade_name": "",
159
- "identify_organ": "",
160
- "abstract_alt": "",
161
- "pub_date": "20200000",
162
- "provider": "CNKI",
163
- "pub_year": "2020",
164
- "fulltext_type": "",
165
- "corr_author": "",
166
- "recommend_date": "",
167
- "clc_no_1st": "R472.2",
168
- "email": "",
169
- "product": "CNKI",
170
- "keyword_machine": "",
171
- "recommend_organ": "",
172
- "earn_foreign": "",
173
- "identify_date": "",
174
- "spread_form": "",
175
- "register_date": "",
176
- "evaluation_form": "验收",
177
- "abstract": "一、主要研究内容 1.基于循证理论,探索目前适合我国对ICU获得性衰弱患者有效的集束化干预策略。 2.研究本策略对提高ICU获得性衰弱患者肌力、改善生活自理能力、缩短ICU入住时间、住院时间及机械通气时间等的影响。 3.建立多学科医护团队,培养一支能够改善ICU获得性衰弱患者预后的优良医护队伍。 二、主要创新点 1.该策略的制定是在循证的基础上,经过相关领域知名专家的二轮的论证而确立,具有较强的科学性和实用性。将该干预策略应用于临床实践,真正解决临床实际问题。 2.集束化干预策略的应用,优化了传统单一护理措施的模式,将循证所得的综合措施应用于临床护理实践,为患者提供优质的护理服务,也为从事ICU临床护理工作的同行提供了可借鉴的经验。 3.本研究涉及多学科医护专业人员,通过彼此的合作,旨在培养一支具有良好业务素质、能够有效改善ICUAW患者预后的优良的医护团队。 三、主要技术、经济指标(执行期内和产业化阶段的要分开) 1.拟达到的主要技术指标:①提高ICU获得性衰弱患者肌力、生活自理能力、缩短ICU获得性衰弱患者住院时间,缩短患者机械通气天数。②促进患者疾病康复,为临床培养一支能够改善ICU获得性衰弱患者疾病预后的优良医护团队。③在核心期刊发表高质量研究论文2篇。 2.主要经济指标:ICU获得性衰弱集束化干预策略的实施,可缩短ICU获得性衰弱患者入住ICU时间及住院时间、有效降低患者医疗费用、节约医疗资源,具有显著的经济效益。",
178
- "fulltext_addr": "",
179
- "transfer_content": "",
180
- "register_organ": "",
181
- "transfer_fee": "",
182
- "corr_organ": "",
183
- "accept_date": "20200000",
184
- "is_deprecated": "0",
185
- "title_alt": "",
186
- "raw_type": "应用技术",
187
- "restricted": "",
188
- "transfer_form": "",
189
- "spread_scope": "",
190
- "spread_track": "",
191
- "output_value": "",
192
- "app_no": "",
193
- "author_alt": "",
194
- "organ_id": ""
195
- }
196
- print(t.transform(data))
@@ -1,203 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/2 9:39
3
- # @Author: suhong
4
- # @File : TransformJournalTitleToZt.py
5
- # @Function : 转换期刊a层到智图代码
6
- from boto3 import Session
7
- from re_common.facade.mysqlfacade import MysqlUtiles
8
- from xpinyin import Pinyin
9
- from re_common.baselibrary.utils.basetime import BaseTime
10
-
11
-
12
- class TransformJournalTitleToZt():
13
- def __init__(self):
14
- # 初始化fields
15
- self.fields = [
16
- "lngid",
17
- "rawid",
18
- "gch",
19
- "title",
20
- "title_alternative",
21
- "title_series",
22
- "identifier_issn",
23
- "identifier_cnno",
24
- "creator",
25
- "creator_en",
26
- "creator_institution",
27
- "source",
28
- "source_en",
29
- "date",
30
- "volume",
31
- "issue",
32
- "description",
33
- "description_en",
34
- "description_fund",
35
- "description_core",
36
- "subject",
37
- "subject_en",
38
- "beginpage",
39
- "endpage",
40
- "page",
41
- "subject_clc",
42
- "date_created",
43
- "identifier_doi",
44
- "country",
45
- "language",
46
- "provider",
47
- "owner",
48
- "type",
49
- "medium",
50
- "batch",
51
- "provider_url",
52
- "provider_id",
53
- "if_pub1st",
54
- "provider_jid",
55
- "rawtype",
56
- "creator_bio",
57
- "cited_cnt",
58
- "source_id",
59
- "identifier_eisbn",
60
- "publisher",
61
- "jumppage",
62
- "identifier_pissn",
63
- "pagecount",
64
- "ref_cnt",
65
- "provider_subject",
66
- "identifier_eissn",
67
- "is_deprecated"
68
- ]
69
- self.BigGchMap = dict()
70
- self.zt_providermap = dict()
71
- self.param_endpoint = 'http://192.168.31.31:9000'
72
- self.param_access_key = 'KBWMHTMFTRF1PUT18O93'
73
- self.param_secret_key = 'ABSceGSwxIii2f+WQsUEl+Im4u0p+F3wpODfCJ+H'
74
- self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
75
- "host": "192.168.31.24",
76
- "user": "root",
77
- "passwd": "vipdatacenter",
78
- "db": "data_warehouse_sql",
79
- "port": "3306",
80
- "chartset": "utf8mb4",
81
- })
82
- self.get_gch()
83
- self.get_zt_provider()
84
- self.p = Pinyin()
85
-
86
- def get_zt_provider(self):
87
- rows = self.mysqlutils.SelectFromDB(
88
- "select sub_db_id,provider from a_transform_task where source_type = '3' and out_type = 'zt'")
89
- for row in rows[1]:
90
- self.zt_providermap[row[0]] = row[1]
91
-
92
- def get_gch(self):
93
- gchpath = ""
94
- gchmap = dict()
95
- sub_list = ["00002", "00393", "00004", "00006", "00169", "00451", "00452", "00288"]
96
- for sub_db in sub_list:
97
- if sub_db == "00002" or sub_db == "00393" or sub_db == "00169" or sub_db == "00451" or sub_db == "00452":
98
- gchpath = "suhong/gchmap/bidgch.txt"
99
- if sub_db == "00004" or sub_db == "00288":
100
- gchpath = "suhong/gchmap/qidgch.txt"
101
- if sub_db == "00006":
102
- gchpath = "suhong/gchmap/cidgch.txt"
103
- session = Session(aws_access_key_id=self.param_access_key,
104
- aws_secret_access_key=self.param_secret_key)
105
- s3 = session.resource('s3', endpoint_url=self.param_endpoint)
106
- bucket = s3.Bucket('temp.dc.cqvip.com')
107
- m = bucket.Object(gchpath)
108
- lines = m.get()['Body'].read().decode('utf-8')
109
- for line in lines.split("\n"):
110
- if len(line) != 0:
111
- gch = line.split("\t")[0].replace("\r", "").replace(" ", "")
112
- r_id = line.split("\t")[1].replace("\r", "").replace(" ", "")
113
- if gch == "#" or r_id == "#":
114
- continue
115
- gchmap[r_id] = gch
116
- self.BigGchMap[sub_db] = gchmap
117
-
118
- def format_data_create(self, publishdate, years):
119
- if publishdate == "" or publishdate[0:4] == "1900":
120
- return years + "0000"
121
- else:
122
- return publishdate
123
-
124
- def transform(self, titleMap):
125
- transMap = dict()
126
- journal_raw_id = titleMap.get("journal_raw_id", "")
127
- sub_db_id = titleMap.get("sub_db_id", "")
128
- transMap["lngid"] = titleMap.get("lngid", "")
129
- transMap["rawid"] = titleMap.get("rawid", "")
130
- transMap["title"] = titleMap.get("title", "")
131
- transMap["title_series"] = titleMap.get("column_info", "")
132
- transMap["title_alternative"] = titleMap.get("title_alt", "")
133
- if transMap["title_alternative"] == "@@":
134
- transMap["title_alternative"] = ""
135
- transMap["identifier_issn"] = titleMap.get("issn", "")
136
- transMap["identifier_cnno"] = titleMap.get("cnno", "")
137
- if sub_db_id == "00001":
138
- transMap["gch"] = titleMap.get("gch", "")
139
- else:
140
- try:
141
- transMap["gch"] = self.BigGchMap[sub_db_id].get(journal_raw_id, "")
142
- except:
143
- transMap["gch"] = ''
144
- transMap["creator"] = titleMap.get("author", "")
145
- transMap["creator_en"] = titleMap.get("author_alt", "")
146
- transMap["creator_institution"] = titleMap.get("organ", "")
147
- transMap["source"] = titleMap.get("journal_name", "")
148
- transMap["source_en"] = titleMap.get("journal_name_alt", "")
149
- if transMap["source"] != "":
150
- py = self.p.get_pinyin(transMap["source"], '')
151
- transMap["source_fl"] = py[0]
152
- transMap["date"] = titleMap.get("pub_year", "")
153
- transMap["volume"] = titleMap.get("vol", "")
154
- transMap["issue"] = titleMap.get("num", "")
155
- transMap["description"] = titleMap.get("abstract", "")
156
- transMap["description_en"] = titleMap.get("abstract_alt", "")
157
- transMap["description_fund"] = titleMap.get("fund", "")
158
- transMap["description_core"] = titleMap.get("range", "")
159
- transMap["subject"] = titleMap.get("keyword", "")
160
- transMap["subject_en"] = titleMap.get("keyword_alt", "")
161
- transMap["subject_clc_g1"] = titleMap.get("clc_no_1st", "")
162
- transMap["beginpage"] = titleMap.get("begin_page", "")
163
- transMap["endpage"] = titleMap.get("end_page", "")
164
- transMap["jumppage"] = titleMap.get("jump_page", "")
165
- transMap["page"] = titleMap.get("page_info", "")
166
- transMap["subject_clc"] = titleMap.get("clc_no", "")
167
- transMap["date_created"] = self.format_data_create(titleMap.get("pub_date", ""), titleMap.get("pub_year", ""))
168
- transMap["identifier_doi"] = titleMap.get("doi", "")
169
- transMap["country"] = titleMap.get("country", "")
170
- transMap["language"] = titleMap.get("language", "")
171
- # transMap["provider"] = titleMap.get("zt_provider", "")
172
- # if transMap["provider"] == "":
173
- transMap["provider"] = self.zt_providermap[sub_db_id]
174
- transMap["type"] = titleMap.get("source_type", "")
175
- transMap["medium"] = "2"
176
- transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
177
- transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
178
- transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
179
- transMap["if_pub1st"] = "0"
180
- if sub_db_id == "00393":
181
- transMap["if_pub1st"] = "1"
182
- transMap["provider_jid"] = transMap["provider"] + "@" + journal_raw_id
183
- transMap["rawtype"] = titleMap.get("raw_type", "")
184
- transMap["creator_bio"] = titleMap.get("author_intro", "")
185
- transMap["cited_cnt"] = titleMap.get("cited_cnt", "")
186
- # transMap["source_id"] = titleMap.get("journal_raw_id", "")
187
- transMap["identifier_eisbn"] = titleMap.get("isbn", "")
188
- transMap["publisher"] = titleMap.get("publisher", "")
189
- transMap["identifier_pissn"] = titleMap.get("issn", "")
190
- transMap["pagecount"] = titleMap.get("page_cnt", "")
191
- transMap["ref_cnt"] = titleMap.get("ref_cnt", "")
192
- transMap["provider_subject"] = titleMap.get("subject", "")
193
- transMap["identifier_eissn"] = titleMap.get("eissn", "")
194
- transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
195
-
196
- for field in self.fields:
197
- if field not in transMap.keys():
198
- transMap[field] = ""
199
- return transMap
200
-
201
-
202
- if __name__ == '__main__':
203
- t = TransformJournalTitleToZt()
@@ -1,132 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/15 14:09
3
- # @Author: suhong
4
- # @File : TransformPantentTitleToZt.py
5
- # @Function :转换专利层到智图代码
6
- from re_common.baselibrary.utils.basetime import BaseTime
7
-
8
- from re_common.facade.mysqlfacade import MysqlUtiles
9
-
10
-
11
- class TransformPantentTitleToZt():
12
- def __init__(self):
13
- # 初始化fields
14
- self.fields = [
15
- "lngid",
16
- "rawid",
17
- "title",
18
- "title_alternative",
19
- "creator",
20
- "creator_en",
21
- "creator_bio",
22
- "creator_institution",
23
- "applicant",
24
- "date",
25
- "description",
26
- "description_en",
27
- "subject",
28
- "subject_en",
29
- "subject_clc",
30
- "subject_esc",
31
- "subject_isc",
32
- "subject_csc",
33
- "date_created",
34
- "agency",
35
- "agents",
36
- "description_core",
37
- "legal_status",
38
- "pct_app_data",
39
- "pct_enter_nation_date",
40
- "pct_pub_data",
41
- "priority_number",
42
- "identifier_pissn",
43
- "date_impl",
44
- "identifier_standard",
45
- "province_code",
46
- "page",
47
- "description_type",
48
- "cited_cnt",
49
- "language",
50
- "country",
51
- "type",
52
- "provider",
53
- "provider_url",
54
- "provider_id",
55
- "medium",
56
- "batch",
57
- "is_deprecated"
58
- ]
59
- self.zt_providermap = dict()
60
- self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
61
- "host": "192.168.31.24",
62
- "user": "root",
63
- "passwd": "vipdatacenter",
64
- "db": "data_warehouse_sql",
65
- "port": "3306",
66
- "chartset": "utf8mb4",
67
- })
68
- self.get_zt_provider()
69
-
70
- def get_zt_provider(self):
71
- rows = self.mysqlutils.SelectFromDB(
72
- "select sub_db_id,provider from a_transform_task where source_type = '7' and out_type = 'zt'")
73
- for row in rows[1]:
74
- self.zt_providermap[row[0]] = row[1]
75
-
76
- def transform(self, titleMap):
77
- transMap = dict()
78
- sub_db_id = titleMap.get("sub_db_id", "")
79
- transMap["lngid"] = titleMap.get("lngid", "")
80
- transMap["rawid"] = titleMap.get("rawid", "")
81
- transMap["title"] = titleMap.get("title", "")
82
- transMap["title_alternative"] = titleMap.get("title_alt", "")
83
- transMap["creator"] = titleMap.get("author", "")
84
- transMap["creator_en"] = titleMap.get("author_alt", "")
85
- transMap["creator_bio"] = titleMap.get("author_intro", "")
86
- transMap["creator_institution"] = titleMap.get("applicant_addr", "")
87
- transMap["applicant"] = titleMap.get("applicant", "")
88
- transMap["date"] = titleMap.get("pub_year", "")
89
- transMap["description"] = titleMap.get("abstract", "")
90
- transMap["description_en"] = titleMap.get("abstract_alt", "")
91
- transMap["subject"] = titleMap.get("keyword", "")
92
- transMap["subject_en"] = titleMap.get("keyword_alt", "")
93
- transMap["subject_clc"] = titleMap.get("clc_no", "")
94
- transMap["subject_esc"] = titleMap.get("subject_edu", "")
95
- transMap["subject_isc"] = titleMap.get("ipc_no", "")
96
- transMap["subject_csc"] = titleMap.get("ipc_no_1st", "")
97
- transMap["date_created"] = titleMap.get("pub_date", "")
98
- transMap["agency"] = titleMap.get("agency", "")
99
- transMap["agent"] = titleMap.get("agent", "")
100
- transMap["description_core"] = titleMap.get("claim", "")
101
- transMap["legal_status"] = titleMap.get("legal_status", "")
102
- transMap["pct_app_data"] = titleMap.get("pct_app_data", "")
103
- transMap["pct_enter_nation_date"] = titleMap.get("pct_enter_nation_date", "")
104
- transMap["pct_pub_data"] = titleMap.get("pct_pub_data", "")
105
- transMap["priority_number"] = titleMap.get("priority_no", "")
106
- transMap["identifier_pissn"] = titleMap.get("app_no", "")
107
- transMap["date_impl"] = titleMap.get("app_date", "")
108
- transMap["identifier_standard"] = titleMap.get("pub_no", "")
109
- transMap["province_code"] = titleMap.get("organ_area", "")
110
- transMap["page"] = titleMap.get("page_info", "")
111
- transMap["description_type"] = titleMap.get("raw_type", "")
112
- # transMap["provider"] = titleMap.get("zt_provider", "")
113
- # if transMap["provider"] == "":
114
- transMap["provider"] = self.zt_providermap[sub_db_id]
115
- transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
116
- transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
117
- transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
118
- transMap["type"] = titleMap.get("source_type", "")
119
- transMap["medium"] = "2"
120
- transMap["country"] = titleMap.get("country", "")
121
- transMap["language"] = titleMap.get("language", "")
122
- transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
123
-
124
-
125
- for field in self.fields:
126
- if field not in transMap.keys():
127
- transMap[field] = ""
128
- return transMap
129
-
130
-
131
- if __name__ == '__main__':
132
- t = TransformPantentTitleToZt()
@@ -1,114 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- # @Time : 2021/12/19 18:16
3
- # @Author: suhong
4
- # @File : TransformRegulationTitleToZt.py
5
- # @Function : 法律法规a层转智图
6
- from re_common.baselibrary.utils.basetime import BaseTime
7
-
8
- from re_common.facade.mysqlfacade import MysqlUtiles
9
-
10
-
11
- class TransformRegulationTitleToZt():
12
- def __init__(self):
13
- self.fields = [
14
- "lngid",
15
- "rawid",
16
- "title",
17
- "title_alternative",
18
- "creator",
19
- "creator_en",
20
- "creator_release",
21
- "publisher",
22
- "date",
23
- "date_impl",
24
- "description",
25
- "description_en",
26
- "legal_status",
27
- "subject",
28
- "subject_en",
29
- "provider_subject",
30
- "identifier_standard",
31
- "page",
32
- "pagecount",
33
- "agency",
34
- "contributor",
35
- "description_type",
36
- "agents",
37
- "date_created",
38
- "rawtype",
39
- "pub_place",
40
- "language",
41
- "country",
42
- "type",
43
- "provider",
44
- "provider_url",
45
- "provider_id",
46
- "medium",
47
- "batch",
48
- "is_deprecated"
49
- ]
50
- self.zt_providermap = dict()
51
- self.mysqlutils = MysqlUtiles(None, None, builder="MysqlBuilderForDicts", dicts={
52
- "host": "192.168.31.24",
53
- "user": "root",
54
- "passwd": "vipdatacenter",
55
- "db": "data_warehouse_sql",
56
- "port": "3306",
57
- "chartset": "utf8mb4",
58
- })
59
- self.get_zt_provider()
60
-
61
- def get_zt_provider(self):
62
- rows = self.mysqlutils.SelectFromDB(
63
- "select sub_db_id,provider from a_transform_task where source_type = '8' and out_type = 'zt'")
64
- for row in rows[1]:
65
- self.zt_providermap[row[0]] = row[1]
66
-
67
- def transform(self, titleMap):
68
- transMap = dict()
69
- sub_db_id = titleMap.get("sub_db_id", "")
70
- transMap["lngid"] = titleMap.get("lngid", "")
71
- transMap["rawid"] = titleMap.get("rawid", "")
72
- transMap["title"] = titleMap.get("title", "")
73
- transMap["title_alternative"] = titleMap.get("title_alt", "")
74
- transMap["creator"] = titleMap.get("author", "")
75
- transMap["creator_en"] = titleMap.get("author_alt", "")
76
- transMap["creator_release"] = titleMap.get("host_organ", "")
77
- transMap["publisher"] = titleMap.get("final_court", "")
78
- transMap["date"] = titleMap.get("pub_year", "")
79
- transMap["date_impl"] = titleMap.get("impl_date", "").replace("-","")
80
- transMap["description"] = titleMap.get("abstract", "")
81
- transMap["description_en"] = titleMap.get("abstract_alt", "")
82
- transMap["legal_status"] = titleMap.get("legal_status", "")
83
- transMap["subject"] = titleMap.get("keyword", "")
84
- transMap["subject_en"] = titleMap.get("keyword_alt", "")
85
- transMap["provider_subject"] = titleMap.get("subject", "")
86
- transMap["identifier_standard"] = titleMap.get("pub_no", "")
87
- transMap["page"] = titleMap.get("page_info", "")
88
- transMap["pagecount"] = titleMap.get("page_cnt", "")
89
- transMap["agency"] = titleMap.get("agency", "")
90
- transMap["contributor"] = titleMap.get("contributor", "")
91
- transMap["description_type"] = titleMap.get("level", "")
92
- transMap["agents"] = titleMap.get("agents", "")
93
- transMap["date_created"] = titleMap.get("pub_date", "").replace("-","")
94
- transMap["rawtype"] = titleMap.get("raw_type", "")
95
- transMap["pub_place"] = titleMap.get("pub_place", "")
96
- # transMap["provider"] = titleMap.get("zt_provider", "")
97
- # if transMap["provider"] == "":
98
- transMap["provider"] = self.zt_providermap[sub_db_id]
99
- transMap["provider_url"] = transMap["provider"] + "@" + titleMap.get("provider_url")
100
- transMap["provider_id"] = transMap["provider"] + "@" + titleMap.get("rawid")
101
- transMap["batch"] = BaseTime().get_beijin_date_strins("%Y%m%d") + "00"
102
- transMap["type"] = titleMap.get("source_type", "")
103
- transMap["medium"] = "2"
104
- transMap["country"] = titleMap.get("country", "")
105
- transMap["language"] = titleMap.get("language", "")
106
- transMap["is_deprecated"] = titleMap.get("is_deprecated", "")
107
- for field in self.fields:
108
- if field not in transMap.keys():
109
- transMap[field] = ""
110
- return transMap
111
-
112
- if __name__ == '__main__':
113
- t = TransformRegulationTitleToZt()
114
-