myner-chencheng 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: myner_chencheng
3
+ Version: 0.1.0
4
+ Summary: A simple NER package for learning
5
+ Author-email: Chen Cheng <your_email@example.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/yourusername/myner
8
+ Requires-Python: >=3.8
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: torch>=1.13
11
+ Requires-Dist: transformers>=4.20
@@ -0,0 +1,21 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "myner_chencheng" # 包名,唯一
7
+ version = "0.1.0" # 版本
8
+ description = "A simple NER package for learning"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ { name = "Chen Cheng", email = "your_email@example.com" }
13
+ ]
14
+ license = "MIT"
15
+ dependencies = [
16
+ "torch>=1.13",
17
+ "transformers>=4.20"
18
+ ]
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/yourusername/myner"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,130 @@
1
+ # coding=utf-8
2
+ import sys
3
+ import os
4
+
5
+ # 获取项目根目录(bj_23AI_KGCode)
6
+ root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
7
+ sys.path.insert(0, root_dir)
8
+
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+ from chapter4_code.BiLSTM_Attention_RE.utils.data_loader import *
14
+ from lstmAtten_datautils.process import relation2id,word2id
15
+ from chapter4_code.BiLSTM_Attention_RE.config import Config
16
+ conf = Config()
17
+ word2id_dict=word2id(conf.base_conf)
18
+ relation2id=relation2id(conf.base_conf)
19
+
20
+ class BiLSTM_ATT(nn.Module):
21
+ def __init__(self,conf):
22
+ '''
23
+ BiLSTM+注意力机制模型,用于关系分类任务
24
+ :param conf: 配置文件对象,包含模型参数
25
+ :param vocab_size: 词汇表大小(去重后的单词总数)
26
+ :param pos_size: 位置编码的数量(如0到149)
27
+ :param tag_size: 关系类型标签的数量
28
+ '''
29
+ super().__init__()
30
+ self.device = conf.device
31
+ self.vocab_size = len(word2id_dict)+1
32
+ # 单词嵌入的维度
33
+ self.embedding_dim = conf.embedding_dim
34
+ self.pos_size = conf.pos_size
35
+ # 位置嵌入的维度
36
+ self.pos_dim = conf.pos_dim
37
+ # LSTM输出维度(实际隐藏层维度为hidden_dim//2,因为是双向LSTM)
38
+ self.hidden_dim = conf.hidden_dim
39
+ self.tag_size = len(relation2id)
40
+
41
+ # 步骤1:定义单词嵌入层,将单词转为向量
42
+ self.word_embed = nn.Embedding(self.vocab_size, self.embedding_dim)
43
+ # 步骤2:定义实体1位置嵌入层,表示单词相对实体1的距离
44
+ self.pos1_embed = nn.Embedding(self.pos_size, self.pos_dim)
45
+ # 步骤3:定义实体2位置嵌入层,表示单词相对实体2的距离
46
+ self.pos2_embed = nn.Embedding(self.pos_size, self.pos_dim)
47
+ # 步骤4:定义双向LSTM层,捕获句子上下文信息
48
+ self.lstm = nn.LSTM(input_size=self.embedding_dim + self.pos_dim * 2,
49
+ hidden_size=self.hidden_dim // 2,
50
+ bidirectional=True, batch_first=True)
51
+ # 步骤5:定义注意力权重,形状[hidden_dim, 1],对应论文w ∈ ℝ^{d_w}
52
+ self.weight = nn.Parameter(torch.randn(self.hidden_dim, 1).to(conf.device))
53
+ # 步骤6:定义输出层,将句子表示映射到关系标签
54
+ self.out = nn.Linear(self.hidden_dim, self.tag_size)
55
+ # 步骤7:定义Dropout层,防止过拟合
56
+ self.dropout_embed = nn.Dropout(p=0.3)
57
+ self.dropout_lstm = nn.Dropout(p=0.3)
58
+ self.dropout_atten = nn.Dropout(p=0.5)
59
+
60
+ def attention(self, H):
61
+ '''
62
+ 计算注意力权重并生成句子表示,严格按照论文公式,仅保留核心转置
63
+ :param H: LSTM输出,形状[batch_size, seq_len, hidden_dim]
64
+ :return: 句子表示,形状[batch_size, hidden_dim],对应论文h* ∈ ℝ^{d_w}
65
+ '''
66
+ # 步骤1:计算M = tanh(H),公式:M = tanh(H)
67
+ # H: [batch_size, seq_len, hidden_dim], # M: [batch_size, seq_len, hidden_dim]
68
+ M = torch.tanh(H)
69
+
70
+ # 步骤2:计算α = softmax(w^T M),公式:α = softmax(w^T M)
71
+ # M: [batch_size, seq_len, hidden_dim] # self.weight: [hidden_dim, 1] # w^T M: [batch_size, seq_len, 1]
72
+ alpha_scores = torch.matmul(M, self.weight) # [batch_size, seq_len, 1]
73
+ alpha_scores=alpha_scores.squeeze(-1) # [batch_size, seq_len]
74
+ # softmax得到α: [batch_size, seq_len]
75
+ alpha = F.softmax(alpha_scores, dim=-1)
76
+
77
+ # 步骤3:计算r = H α^T,公式:r = H α^T
78
+ # H: [batch_size, seq_len, hidden_dim] # H^T: [batch_size, hidden_dim, seq_len] # α: [batch_size, seq_len, 1]
79
+ r = torch.bmm(H.transpose(1, 2), alpha.unsqueeze(-1)) # [batch_size, hidden_dim, 1]
80
+ # 移除多余维度,得到r: [batch_size, hidden_dim]
81
+ r = r.squeeze(-1)
82
+ # 步骤4:计算h* = tanh(r),公式:h* = tanh(r)
83
+ # h_star: [batch_size, hidden_dim]
84
+ h_star = torch.tanh(r)
85
+
86
+ return h_star
87
+
88
+ def forward(self, sentence, pos1, pos2):
89
+ '''
90
+ 模型前向传播
91
+ :param sentence: 输入句子,形状[batch_size, seq_len]
92
+ :param pos1: 实体1位置编码,形状[batch_size, seq_len]
93
+ :param pos2: 实体2位置编码,形状[batch_size, seq_len]
94
+ :return: 预测关系类型分数,形状[batch_size, tag_size]
95
+ '''
96
+ # 步骤1:将句子、实体1和实体2位置转为嵌入向量并拼接
97
+ embeds = torch.cat((self.word_embed(sentence),
98
+ self.pos1_embed(pos1),
99
+ self.pos2_embed(pos2)), dim=-1)
100
+
101
+ # 步骤2:对嵌入应用Dropout,防止过拟合
102
+ embeds = self.dropout_embed(embeds)
103
+
104
+ # 步骤3:通过双向LSTM,捕获上下文信息
105
+ lstm_out, _ = self.lstm(embeds) # [batch_size, seq_len, hidden_dim]
106
+ lstm_out = self.dropout_lstm(lstm_out)
107
+
108
+ # 步骤4:应用注意力机制,提取关键信息
109
+ sentence_repr = self.attention(lstm_out) # [batch_size, hidden_dim]
110
+ sentence_repr = self.dropout_atten(sentence_repr)
111
+
112
+ # 步骤5:通过输出层映射到关系类型分数
113
+ output = self.out(sentence_repr) # [batch_size, tag_size]
114
+
115
+ return output
116
+
117
+ if __name__ == '__main__':
118
+ # 主函数:测试模型输入输出形状
119
+ loaders = get_all_loader()
120
+ train_loader = loaders["train"]
121
+ test_loader = loaders["test"]
122
+
123
+ model = BiLSTM_ATT(conf).to(conf.device)
124
+ for datas, positionE1, positionE2, labels, _, _, _ in train_loader:
125
+ print(f'输入句子形状--->{datas.shape}')
126
+ print(f'实体1位置编码形状--->{positionE1.shape}')
127
+ print(f'实体2位置编码形状--->{positionE2.shape}')
128
+ # 前向传播
129
+ output = model(datas, positionE1, positionE2)
130
+ print(f'模型输出形状--->{output.shape}')
@@ -0,0 +1 @@
1
+ # coding: utf-8
@@ -0,0 +1,123 @@
1
+ # coding=utf-8
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from chapter4_code.BiLSTM_Attention_RE.utils.data_loader import *
6
+ from lstmAtten_datautils.process import relation2id,word2id
7
+ from chapter4_code.BiLSTM_Attention_RE.config import Config
8
+ conf = Config()
9
+ word2id_dict=word2id(conf.base_conf)
10
+ relation2id=relation2id(conf.base_conf)
11
+
12
+ class BiLSTM_ATT(nn.Module):
13
+ def __init__(self,conf):
14
+ '''
15
+ BiLSTM+注意力机制模型,用于关系分类任务
16
+ :param conf: 配置文件对象,包含模型参数
17
+ :param vocab_size: 词汇表大小(去重后的单词总数)
18
+ :param pos_size: 位置编码的数量(如0到149)
19
+ :param tag_size: 关系类型标签的数量
20
+ '''
21
+ super().__init__()
22
+ # init配置信息 device、vocab_size、embedding_dim、pos_size、pos_dim
23
+ self.device = conf.device
24
+ self.vocab_size = len(word2id_dict)+1
25
+ # 单词嵌入的维度
26
+ self.embedding_dim = conf.embedding_dim
27
+ self.pos_size = conf.pos_size
28
+ # 位置嵌入的维度
29
+ self.pos_dim = conf.pos_dim
30
+ # LSTM输出维度(实际隐藏层维度为hidden_dim//2,因为是双向LSTM)
31
+ self.hidden_dim = conf.hidden_dim
32
+ self.tag_size = len(relation2id)
33
+
34
+ # 步骤1:定义模块 定义文本word_embed嵌入层、实体1位置嵌入pos1_embed、实体2位置嵌入pos2_embed
35
+ self.word_embed = nn.Embedding(self.vocab_size, self.embedding_dim)
36
+ # 步骤2:定义实体1位置嵌入层,表示单词相对实体1的距离
37
+ self.pos1_embed = nn.Embedding(self.pos_size, self.pos_dim)
38
+ # 步骤3:定义实体2位置嵌入层,表示单词相对实体2的距离
39
+ self.pos2_embed = nn.Embedding(self.pos_size, self.pos_dim)
40
+ # 步骤4:定义双向LSTM层,捕获句子上下文信息
41
+ self.lstm = nn.LSTM(input_size=self.embedding_dim + self.pos_dim * 2,
42
+ hidden_size=self.hidden_dim // 2,
43
+ bidirectional=True, batch_first=True)
44
+ # 步骤5:定义注意力权重,形状[hidden_dim, 1],对应论文w ∈ ℝ^{d_w}
45
+ self.weight = nn.Parameter(torch.randn(self.hidden_dim, 1).to(conf.device))
46
+ # 步骤6:定义输出层,将句子表示映射到关系标签
47
+ self.out = nn.Linear(self.hidden_dim, self.tag_size)
48
+ # 步骤7:定义Dropout层,防止过拟合
49
+ self.dropout_embed = nn.Dropout(p=0.3)
50
+ self.dropout_lstm = nn.Dropout(p=0.3)
51
+ self.dropout_atten = nn.Dropout(p=0.5)
52
+
53
+ def attention(self, H):
54
+ '''
55
+ 计算注意力权重并生成句子表示,严格按照论文公式,仅保留核心转置
56
+ :param H: LSTM输出,形状[batch_size, seq_len, hidden_dim]
57
+ :return: 句子表示,形状[batch_size, hidden_dim],对应论文h* ∈ ℝ^{d_w}
58
+ '''
59
+ # 步骤1:计算M = tanh(H),公式:M = tanh(H)
60
+ # H: [batch_size, seq_len, hidden_dim], # M: [batch_size, seq_len, hidden_dim]
61
+ M = torch.tanh(H)
62
+
63
+ # 步骤2:计算α = softmax(w^T M),公式:α = softmax(w^T M)
64
+ # M: [batch_size, seq_len, hidden_dim] # self.weight: [hidden_dim, 1] # w^T M: [batch_size, seq_len, 1]
65
+ alpha_scores = torch.matmul(M, self.weight) # [batch_size, seq_len, 1]
66
+ alpha_scores=alpha_scores.squeeze(-1) # [batch_size, seq_len]
67
+ # softmax得到α: [batch_size, seq_len]
68
+ alpha = F.softmax(alpha_scores, dim=-1)
69
+
70
+ # 步骤3:计算r = H α^T,公式:r = H α^T
71
+ # H: [batch_size, seq_len, hidden_dim] # H^T: [batch_size, hidden_dim, seq_len] # α: [batch_size, seq_len, 1]
72
+ r = torch.bmm(H.transpose(1, 2), alpha.unsqueeze(-1)) # [batch_size, hidden_dim, 1]
73
+ # 移除多余维度,得到r: [batch_size, hidden_dim]
74
+ r = r.squeeze(-1)
75
+ # 步骤4:计算h* = tanh(r),公式:h* = tanh(r)
76
+ # h_star: [batch_size, hidden_dim]
77
+ h_star = torch.tanh(r)
78
+
79
+ return h_star
80
+
81
+ def forward(self, sentence, pos1, pos2):
82
+ '''
83
+ 模型前向传播
84
+ :param sentence: 输入句子,形状[batch_size, seq_len]
85
+ :param pos1: 实体1位置编码,形状[batch_size, seq_len]
86
+ :param pos2: 实体2位置编码,形状[batch_size, seq_len]
87
+ :return: 预测关系类型分数,形状[batch_size, tag_size]
88
+ '''
89
+ # 步骤1:将句子、实体1和实体2位置转为嵌入向量并拼接
90
+ embeds = torch.cat((self.word_embed(sentence),
91
+ self.pos1_embed(pos1),
92
+ self.pos2_embed(pos2)), dim=-1)
93
+
94
+ # 步骤2:对嵌入应用Dropout,防止过拟合
95
+ embeds = self.dropout_embed(embeds)
96
+
97
+ # 步骤3:通过双向LSTM,捕获上下文信息
98
+ lstm_out, _ = self.lstm(embeds) # [batch_size, seq_len, hidden_dim]
99
+ lstm_out = self.dropout_lstm(lstm_out)
100
+
101
+ # 步骤4:应用注意力机制,提取关键信息
102
+ sentence_repr = self.attention(lstm_out) # [batch_size, hidden_dim]
103
+ sentence_repr = self.dropout_atten(sentence_repr)
104
+
105
+ # 步骤5:通过输出层映射到关系类型分数
106
+ output = self.out(sentence_repr) # [batch_size, tag_size]
107
+
108
+ return output
109
+
110
+ if __name__ == '__main__':
111
+ # 主函数:测试模型输入输出形状
112
+ loaders = get_all_loader()
113
+ train_loader = loaders["train"]
114
+ test_loader = loaders["test"]
115
+
116
+ model = BiLSTM_ATT(conf).to(conf.device)
117
+ for datas, positionE1, positionE2, labels, _, _, _ in train_loader:
118
+ print(f'输入句子形状--->{datas.shape}')
119
+ print(f'实体1位置编码形状--->{positionE1.shape}')
120
+ print(f'实体2位置编码形状--->{positionE2.shape}')
121
+ # 前向传播
122
+ output = model(datas, positionE1, positionE2)
123
+ print(f'模型输出形状--->{output.shape}')
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: myner_chencheng
3
+ Version: 0.1.0
4
+ Summary: A simple NER package for learning
5
+ Author-email: Chen Cheng <your_email@example.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/yourusername/myner
8
+ Requires-Python: >=3.8
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: torch>=1.13
11
+ Requires-Dist: transformers>=4.20
@@ -0,0 +1,9 @@
1
+ pyproject.toml
2
+ src/model/BiLSTM_Attn.py
3
+ src/model/__init__.py
4
+ src/model/bj_BiLSTM_Attn.py
5
+ src/myner_chencheng.egg-info/PKG-INFO
6
+ src/myner_chencheng.egg-info/SOURCES.txt
7
+ src/myner_chencheng.egg-info/dependency_links.txt
8
+ src/myner_chencheng.egg-info/requires.txt
9
+ src/myner_chencheng.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ torch>=1.13
2
+ transformers>=4.20