psaiops 0.0.13__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,15 @@ import gradio
4
4
  import torch
5
5
  import torch.cuda
6
6
 
7
+ import psaiops.common.model
8
+ import psaiops.common.tokenizer
7
9
  import psaiops.score.attention.lib
8
10
 
9
11
  # META #########################################################################
10
12
 
11
- TITLE = '''Attention Scoring'''
12
- INTRO = '''Score each token according to the weights of the attention layers.\nThe model is fixed to "openai/gpt-oss-20b" for now.'''
13
13
  STYLE = '''.white-text span { color: white; }'''
14
+ TITLE = '''Attention Scoring'''
15
+ INTRO = '''Score each token according to the weights of the attention layers.\nUnder construction, only "openai/gpt-oss-20b" is available for now.'''
14
16
 
15
17
  MODEL = 'openai/gpt-oss-20b'
16
18
 
@@ -18,26 +20,20 @@ MODEL = 'openai/gpt-oss-20b'
18
20
 
19
21
  def create_color_map() -> dict:
20
22
  return {
21
- '-1': '#00ff00',
23
+ '-1': '#004444',
22
24
  **{str(__i): '#{:02x}0000'.format(int(2.55 * __i)) for __i in range(101)}}
23
25
 
24
26
  # INTRO ########################################################################
25
27
 
26
28
  def create_intro_block(intro: str) -> dict:
27
- __intro = gradio.Markdown(intro)
29
+ __intro = gradio.Markdown(intro, line_breaks=True)
28
30
  return {'intro_block': __intro}
29
31
 
30
32
  # MODEL ########################################################################
31
33
 
32
34
  def create_model_block() -> dict:
33
- __model_dd = gradio.Dropdown(label='Model', value='openai/gpt-oss-20b', choices=['openai/gpt-oss-20b'], scale=1, allow_custom_value=False, multiselect=False, interactive=True) # 'openai/gpt-oss-120b'
34
- __layer_sl = gradio.Slider(label='Layer Depth', value=12, minimum=-1, maximum=23, step=1, scale=1, interactive=True) # info='-1 to average on all layers'
35
- __head_sl = gradio.Slider(label='Attention Head', value=-1, minimum=-1, maximum=63, step=1, scale=1, interactive=True) # info='-1 to average on all heads'
36
- __model_dd.change(fn=update_layer_range, inputs=[__layer_sl, __model_dd], outputs=__layer_sl, queue=False, show_progress='hidden')
37
- return {
38
- 'model_block': __model_dd,
39
- 'layer_block': __layer_sl,
40
- 'head_block': __head_sl}
35
+ __model = gradio.Dropdown(label='Model', value='openai/gpt-oss-20b', choices=['openai/gpt-oss-20b'], scale=1, allow_custom_value=False, multiselect=False, interactive=True) # 'openai/gpt-oss-120b'
36
+ return {'model_block': __model,}
41
37
 
42
38
  # SAMPLING #####################################################################
43
39
 
@@ -53,14 +49,14 @@ def create_sampling_block() -> dict:
53
49
  # TARGET #######################################################################
54
50
 
55
51
  def create_target_block() -> dict:
56
- __target = gradio.Radio(label='Score', value='Inputs', choices=['Inputs', 'Outputs', 'Both'], scale=1, interactive=True)
52
+ __target = gradio.Radio(label='Score', value='Inputs', choices=['Inputs', 'Everything'], scale=1, interactive=True)
57
53
  return {'target_block': __target}
58
54
 
59
55
  # DISPLAY ######################################################################
60
56
 
61
- def create_display_block() -> dict:
62
- __display = gradio.Radio(label='Display', value='Tokens', choices=['Tokens', 'Indexes'], scale=1, interactive=True)
63
- return {'display_block': __display}
57
+ # def create_display_block() -> dict:
58
+ # __display = gradio.Radio(label='Display', value='Tokens', choices=['Tokens', 'Indexes'], scale=1, interactive=True)
59
+ # return {'display_block': __display}
64
60
 
65
61
  # INPUTS #######################################################################
66
62
 
@@ -71,17 +67,25 @@ def create_inputs_block() -> dict:
71
67
  # OUTPUTS ######################################################################
72
68
 
73
69
  def create_outputs_block() -> dict:
74
- __output = gradio.HighlightedText(label='Scores', value='', scale=1, interactive=False, show_legend=False, show_inline_category=False, combine_adjacent=True, color_map=create_color_map(), elem_classes='white-text')
70
+ __output = gradio.HighlightedText(label='Scores', value='', scale=1, interactive=False, show_legend=False, show_inline_category=False, combine_adjacent=False, color_map=create_color_map(), elem_classes='white-text')
75
71
  return {'output_block': __output}
76
72
 
73
+ # SELECT #######################################################################
74
+
75
+ def create_selection_block() -> dict:
76
+ __position = gradio.Slider(label='Token Position', value=-1, minimum=-1, maximum=15, step=1, scale=1, interactive=True) # info='-1 to average on all tokens'
77
+ __layer = gradio.Slider(label='Layer Depth', value=12, minimum=-1, maximum=23, step=1, scale=1, interactive=True) # info='-1 to average on all layers'
78
+ __head = gradio.Slider(label='Attention Head', value=-1, minimum=-1, maximum=63, step=1, scale=1, interactive=True) # info='-1 to average on all heads'
79
+ return {
80
+ 'position_block': __position,
81
+ 'layer_block': __layer,
82
+ 'head_block': __head,}
83
+
77
84
  # ACTIONS ######################################################################
78
85
 
79
86
  def create_actions_block() -> dict:
80
87
  __process = gradio.Button('Process', variant='primary', size='lg', scale=1, interactive=True)
81
- __position = gradio.Slider(label='Position', value=-1, minimum=-1, maximum=128, step=1, scale=1, interactive=True) # info='-1 to average on all tokens'
82
- return {
83
- 'process_block': __process,
84
- 'position_block': __position}
88
+ return {'process_block': __process,}
85
89
 
86
90
  # STATE ########################################################################
87
91
 
@@ -102,6 +106,8 @@ def create_layout(intro: str=INTRO) -> dict:
102
106
  with gradio.Row(equal_height=True):
103
107
  __fields.update(create_inputs_block())
104
108
  __fields.update(create_outputs_block())
109
+ with gradio.Row(equal_height=True):
110
+ __fields.update(create_selection_block())
105
111
  with gradio.Row(equal_height=True):
106
112
  __fields.update(create_actions_block())
107
113
  with gradio.Tab('Settings') as __settings_tab:
@@ -113,7 +119,7 @@ def create_layout(intro: str=INTRO) -> dict:
113
119
  __fields.update(create_sampling_block())
114
120
  with gradio.Row(equal_height=True):
115
121
  __fields.update(create_target_block())
116
- __fields.update(create_display_block())
122
+ # __fields.update(create_display_block())
117
123
  return __fields
118
124
 
119
125
  # EVENTS #######################################################################
@@ -121,57 +127,77 @@ def create_layout(intro: str=INTRO) -> dict:
121
127
  def update_layer_range(value: float, model: str) -> dict:
122
128
  return gradio.update(maximum=35, value=min(35, int(value))) if '120b' in model else gradio.update(maximum=23, value=min(23, int(value)))
123
129
 
124
- def update_position_range(value: float, tokens: list) -> dict:
125
- return gradio.update(maximum=len(tokens) - 1, value=min(len(tokens) - 1, int(value)))
130
+ def update_position_range(value: float, tokens: float) -> dict:
131
+ return gradio.update(maximum=int(tokens) - 1, value=min(int(tokens) - 1, int(value)))
126
132
 
127
133
  def update_computation_state(
128
134
  token_num: float,
129
135
  topk_num: float,
130
136
  topp_num: float,
137
+ token_idx: float,
138
+ layer_idx: float,
139
+ head_idx: float,
131
140
  prompt_str: str,
132
141
  device_str: str,
133
142
  model_obj: object,
134
143
  tokenizer_obj: object,
135
144
  ) -> tuple:
136
145
  # sanitize the inputs
137
- __limit = max(1, min(128, int(token_num)))
138
- __topk = max(1, min(128, int(token_num)))
139
- __topp = max(0.0, min(1.0, float(token_num)))
140
- __prompt = prompt_str.strip()
141
- __device = device_str if (device_str in ['cpu', 'cuda']) else 'cpu'
146
+ __token_num = max(1, min(128, int(token_num)))
147
+ __topk_num = max(1, min(8, int(topk_num)))
148
+ __topp_num = max(0.0, min(1.0, float(topp_num)))
149
+ __token_idx = max(-1, min(__token_num, int(token_idx)))
150
+ __layer_idx = max(-1, int(layer_idx))
151
+ __head_idx = max(-1, int(head_idx))
152
+ __prompt_str = prompt_str.strip()
153
+ __device_str = device_str if (device_str in ['cpu', 'cuda']) else 'cpu'
154
+ # exit if some values are missing
155
+ if (not __prompt_str) or (model_obj is None) or (tokenizer_obj is None):
156
+ return ([], [], [], torch.empty(0))
142
157
  # handle all exceptions at once
143
158
  try:
144
159
  # dictionary {'input_ids': _, 'attention_mask': _}
145
- __inputs = psaiops.score.attention.lib.preprocess_token_ids(
160
+ __input_data = psaiops.common.tokenizer.preprocess_token_ids(
146
161
  tokenizer_obj=tokenizer_obj,
147
- prompt_str=__prompt,
148
- device_str=__device)
162
+ prompt_str=__prompt_str,
163
+ device_str=__device_str)
149
164
  # parse the inputs
150
- __input_dim = int(__inputs['input_ids'].shape[-1])
165
+ __input_dim = int(__input_data['input_ids'].shape[-1])
151
166
  # tensor (1, T)
152
- __outputs = psaiops.score.attention.lib.generate_token_ids(
167
+ __output_data = psaiops.common.model.generate_token_ids(
153
168
  model_obj=model_obj,
154
- input_args=__inputs,
155
- token_num=__limit,
156
- topk_num=__topk,
157
- topp_num=__topp)
169
+ input_args=__input_data,
170
+ token_num=__token_num,
171
+ topk_num=__topk_num,
172
+ topp_num=__topp_num)
158
173
  # tensor (L, S, H, T, T)
159
- __attentions = psaiops.score.attention.lib.compute_attention_weights(
174
+ __attention_data = psaiops.score.attention.lib.compute_attention_weights(
160
175
  model_obj=model_obj,
161
- token_obj=__outputs)
176
+ token_obj=__output_data)
177
+ # reduce the layer, sample, head and output token axes => tensor (T,)
178
+ __score_data = psaiops.score.attention.lib.reduce_attention_weights(
179
+ attention_data=__attention_data,
180
+ token_idx=__token_idx,
181
+ layer_idx=__layer_idx,
182
+ head_idx=__head_idx,
183
+ input_dim=__input_dim)
184
+ # translate the scores into integer labels
185
+ __labels = psaiops.score.attention.lib.postprocess_attention_scores(
186
+ attention_data=__score_data,
187
+ input_dim=__input_dim,
188
+ token_idx=__token_idx)
162
189
  # detokenize the IDs
163
- __tokens = psaiops.score.attention.lib.postprocess_token_ids(
190
+ __tokens = psaiops.common.tokenizer.postprocess_token_ids(
164
191
  tokenizer_obj=tokenizer_obj,
165
- token_obj=__outputs)
166
- # update each component => (input, output, attention) states
192
+ token_obj=__output_data)
193
+ # update each component => (input, output, attention, highligh) states
167
194
  return (
168
- gradio.update(value=__tokens[:__input_dim]),
169
- gradio.update(value=__tokens[__input_dim:]),
170
- gradio.update(value=__attentions),)
195
+ list(zip(__tokens, __labels)),
196
+ __tokens[:__input_dim],
197
+ __tokens[__input_dim:],
198
+ __attention_data,)
171
199
  except:
172
200
  raise Exception('Attention generation aborted with an error.')
173
- finally:
174
- return (gradio.update(), gradio.update(), gradio.update())
175
201
 
176
202
  def update_text_highlight(
177
203
  token_idx: float,
@@ -180,15 +206,16 @@ def update_text_highlight(
180
206
  input_data: list,
181
207
  output_data: list,
182
208
  attention_data: torch.Tensor,
183
- ) -> dict:
209
+ ) -> list:
184
210
  # sanitize the inputs
185
211
  __input_data = input_data or []
186
212
  __output_data = output_data or []
187
- __attention_data = attention_data or torch.empty(0)
213
+ __attention_data = torch.empty(0) if (attention_data is None) else attention_data
188
214
  __input_dim = len(__input_data)
189
- __token_idx = max(0, min(__input_dim, int(token_idx)))
190
- __layer_idx = max(0, int(layer_idx))
191
- __head_idx = max(0, int(head_idx))
215
+ __output_dim = len(__output_data)
216
+ __token_idx = max(-1, min(__output_dim, int(token_idx)))
217
+ __layer_idx = max(-1, int(layer_idx))
218
+ __head_idx = max(-1, int(head_idx))
192
219
  # exit if the data has not yet been computed
193
220
  if (not __input_data) or (not __output_data) or (attention_data is None) or (len(attention_data) == 0):
194
221
  return gradio.update()
@@ -209,11 +236,9 @@ def update_text_highlight(
209
236
  input_dim=__input_dim,
210
237
  token_idx=__token_idx)
211
238
  # update the component with [(token, label), ...]
212
- return gradio.update(value=list(zip(__tokens, __labels)))
239
+ return list(zip(__tokens, __labels))
213
240
  except:
214
241
  raise Exception('Attention reduction aborted with an error.')
215
- finally:
216
- return gradio.update()
217
242
 
218
243
  # APP ##########################################################################
219
244
 
@@ -222,40 +247,49 @@ def create_app(title: str=TITLE, intro: str=INTRO, style: str=STYLE, model: str=
222
247
  with gradio.Blocks(theme=gradio.themes.Soft(), title=title, css=style) as __app:
223
248
  # load the model
224
249
  __device = 'cuda' if torch.cuda.is_available() else 'cpu'
225
- __model = psaiops.score.attention.lib.get_model(name=model, device=__device)
226
- __tokenizer = psaiops.score.attention.lib.get_tokenizer(name=model, device=__device)
250
+ __model = psaiops.common.model.get_model(name=model, device=__device)
251
+ __tokenizer = psaiops.common.tokenizer.get_tokenizer(name=model, device=__device)
227
252
  # adapt the computing function
228
253
  __compute = functools.partial(update_computation_state, model_obj=__model, tokenizer_obj=__tokenizer, device_str=__device)
229
254
  # create the UI
230
255
  __fields.update(create_layout(intro=intro))
231
256
  # init the state
232
257
  __fields.update(create_state())
233
- # fetch the relevant fields
234
- __button_block, __position_block, __output_block = (__fields['process_block'], __fields['position_block'], __fields['output_block'])
235
- __output_state, __attention_state = (__fields['output_state'], __fields['attention_state'])
236
258
  # wire the input fields
237
- __button_block.click(
259
+ __fields['tokens_block'].change(
260
+ fn=update_position_range,
261
+ inputs=[__fields[__k] for __k in ['position_block', 'tokens_block']],
262
+ outputs=__fields['position_block'],
263
+ queue=False,
264
+ show_progress='hidden')
265
+ __fields['model_block'].change(
266
+ fn=update_layer_range,
267
+ inputs=[__fields[__k] for __k in ['layer_block', 'model_block']],
268
+ outputs=__fields['layer_block'],
269
+ queue=False,
270
+ show_progress='hidden')
271
+ __fields['process_block'].click(
238
272
  fn=__compute,
239
- inputs=[__fields[__k] for __k in ['tokens_block', 'topk_block', 'topp_block', 'input_block']],
240
- outputs=[__fields[__k] for __k in ['input_state', 'output_state', 'attention_state']],
273
+ inputs=[__fields[__k] for __k in ['tokens_block', 'topk_block', 'topp_block', 'position_block', 'layer_block', 'head_block', 'input_block']],
274
+ outputs=[__fields[__k] for __k in ['output_block', 'input_state', 'output_state', 'attention_state']],
241
275
  queue=False,
242
276
  show_progress='full')
243
- __output_state.change(
244
- fn=update_position_range,
245
- inputs=[__position_block, __output],
246
- outputs=__position_block,
277
+ __fields['position_block'].change(
278
+ fn=update_text_highlight,
279
+ inputs=[__fields[__k] for __k in ['position_block', 'layer_block', 'head_block', 'input_state', 'output_state', 'attention_state']],
280
+ outputs=__fields['output_block'],
247
281
  queue=False,
248
282
  show_progress='hidden')
249
- __attention_state.change(
283
+ __fields['layer_block'].change(
250
284
  fn=update_text_highlight,
251
285
  inputs=[__fields[__k] for __k in ['position_block', 'layer_block', 'head_block', 'input_state', 'output_state', 'attention_state']],
252
- outputs=__output_block,
286
+ outputs=__fields['output_block'],
253
287
  queue=False,
254
288
  show_progress='hidden')
255
- __position_block.change(
289
+ __fields['head_block'].change(
256
290
  fn=update_text_highlight,
257
291
  inputs=[__fields[__k] for __k in ['position_block', 'layer_block', 'head_block', 'input_state', 'output_state', 'attention_state']],
258
- outputs=__output_block,
292
+ outputs=__fields['output_block'],
259
293
  queue=False,
260
294
  show_progress='hidden')
261
295
  # gradio application
@@ -1,69 +1,7 @@
1
- import functools
2
-
3
1
  import torch
4
- import transformers
5
-
6
- import deformers.models.openai.gptoss
7
-
8
- # LOAD #########################################################################
9
-
10
- @functools.lru_cache(maxsize=4)
11
- def get_tokenizer(name: str, device: str='cpu'):
12
- return transformers.AutoTokenizer.from_pretrained(
13
- name,
14
- use_fast=True,
15
- dtype='auto',
16
- device_map=device)
17
-
18
- @functools.lru_cache(maxsize=2)
19
- def get_model(name: str, device: str='cpu'):
20
- __model = deformers.models.openai.gptoss.GptOssForCausalInference.from_pretrained(
21
- name,
22
- dtype='auto',
23
- device_map=device)
24
- # toggle the inference mode (not training)
25
- __model.eval()
26
- # transformers model
27
- return __model
28
2
 
29
- # PREPROCESS #####################################################################
30
-
31
- @functools.lru_cache(maxsize=4)
32
- def preprocess_token_ids(
33
- tokenizer_obj: object,
34
- prompt_str: str,
35
- device_str: str='cpu'
36
- ) -> dict:
37
- # tokenize
38
- __inputs = tokenizer_obj(prompt_str, return_tensors='pt')
39
- # move to the main device
40
- return {__k: __v.to(device_str) for __k, __v in __inputs.items()}
41
-
42
- # GENERATE #######################################################################
43
-
44
- def generate_token_ids(
45
- model_obj: object,
46
- input_args: dict,
47
- token_num: int,
48
- topk_num: int = 4,
49
- topp_num: float = 0.9,
50
- ) -> torch.Tensor:
51
- # generate completion
52
- with torch.no_grad():
53
- __outputs = model_obj.generate(
54
- **input_args,
55
- max_new_tokens=token_num,
56
- do_sample=(0.0 < topp_num < 1.0) or (topk_num > 0),
57
- top_k=topk_num if (topk_num > 0) else None,
58
- top_p=topp_num if (0.0 < topp_num < 1.0) else None,
59
- return_dict_in_generate=True,
60
- output_hidden_states=False,
61
- output_attentions=False,
62
- output_scores=False,
63
- # early_stopping=True,
64
- use_cache=True)
65
- # full sequence
66
- return __outputs.sequences # (1, T)
3
+ import psaiops.common.model
4
+ import psaiops.common.tokenizer
67
5
 
68
6
  # COMPUTE ########################################################################
69
7
 
@@ -91,14 +29,14 @@ def reduce_attention_weights(
91
29
  ) -> torch.Tensor:
92
30
  # parse
93
31
  __layer_dim, __batch_dim, __head_dim, __output_dim, __output_dim = tuple(attention_data.shape) # L, B, H, T, T
94
- __layer_idx = min(layer_idx, __layer_dim)
95
- __head_idx = min(head_idx, __head_dim)
32
+ __layer_idx = min(layer_idx, __layer_dim - 1)
33
+ __head_idx = min(head_idx, __head_dim - 1)
96
34
  __token_idx = min(token_idx, __output_dim - input_dim - 1) # T = I + O
97
35
  # select the relevant data along each axis
98
36
  __layer_slice = slice(None) if (__layer_idx < 0) else slice(__layer_idx, __layer_idx + 1)
99
37
  __sample_slice = slice(None)
100
38
  __head_slice = slice(None) if (__head_idx < 0) else slice(__head_idx, __head_idx + 1)
101
- __token_slice = slice(input_dim, __output_dim) if (__token_idx < 0) else slice(input_dim + __token_idx, input_dim + __token_idx + 1)
39
+ __token_slice = slice(input_dim - 1, __output_dim) if (__token_idx < 0) else slice(input_dim + __token_idx - 1, input_dim + __token_idx)
102
40
  # filter the data
103
41
  __data = attention_data[__layer_slice, __sample_slice, __head_slice, __token_slice, slice(None)]
104
42
  # reduce all the axes but the last
@@ -119,7 +57,7 @@ def postprocess_attention_scores(
119
57
  __output_range = list(range(__output_dim - input_dim)) if (__token_idx < 0) else [__token_idx]
120
58
  __output_mask = torch.BoolTensor([__i in __output_range for __i in range(__output_dim - input_dim)])
121
59
  # normalize the scores
122
- __input_scores = attention_data[__input_slice] / (attention_data[__input_slice].sum() + 1e-5)
60
+ __input_scores = attention_data[__input_slice] / (attention_data[__input_slice].max() + 1e-5)
123
61
  # round to obtain integer labels from 0 to 100
124
62
  __input_scores = torch.round(100.0 * __input_scores, decimals=0).type(torch.int32)
125
63
  # the generated tokens are not scored
@@ -127,19 +65,6 @@ def postprocess_attention_scores(
127
65
  # native list of serialized integers
128
66
  return [str(__i) for __i in __input_scores.tolist() + __output_scores.tolist()] # (I,) + (O,) = (T,)
129
67
 
130
- # POSTPROCESS ####################################################################
131
-
132
- def postprocess_token_ids(
133
- tokenizer_obj: object,
134
- token_obj: torch.Tensor,
135
- ) -> list:
136
- # remove the batch axis
137
- __indices = token_obj.squeeze().tolist()
138
- # back to token strings
139
- __tokens = tokenizer_obj.convert_ids_to_tokens(__indices)
140
- # normalize the tokens
141
- return [__t.replace(chr(0x0120), ' ').replace(chr(0x010a), '\n') for __t in __tokens]
142
-
143
68
  # COMPUTE ########################################################################
144
69
 
145
70
  def score_tokens(
@@ -155,14 +80,14 @@ def score_tokens(
155
80
  tokenizer_obj: object,
156
81
  ) -> list:
157
82
  # dictionary {'input_ids': _, 'attention_mask': _}
158
- __inputs = preprocess_token_ids(
83
+ __inputs = psaiops.common.tokenizer.preprocess_token_ids(
159
84
  tokenizer_obj=tokenizer_obj,
160
85
  prompt_str=prompt_str,
161
86
  device_str=device_str)
162
87
  # parse the inputs
163
88
  __input_dim = int(__inputs['input_ids'].shape[-1])
164
89
  # tensor (1, T)
165
- __outputs = generate_token_ids(
90
+ __outputs = psaiops.common.tokenizer.model.generate_token_ids(
166
91
  model_obj=model_obj,
167
92
  input_args=__inputs,
168
93
  token_num=token_num,
@@ -185,7 +110,7 @@ def score_tokens(
185
110
  input_dim=__input_dim,
186
111
  token_idx=token_idx)
187
112
  # detokenize the IDs
188
- __tokens = postprocess_token_ids(
113
+ __tokens = psaiops.common.tokenizer.postprocess_token_ids(
189
114
  tokenizer_obj=tokenizer_obj,
190
115
  token_obj=__outputs)
191
116
  # match tokens and labels for the HighlightedText field
File without changes