PyPI - psaiops - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

psaiops 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of psaiops might be problematic. Click here for more details.

Files changed (11) hide show

psaiops/combine/__init__.py +0 -0
psaiops/compose/contrast/__init__.py +0 -0
psaiops/edit/__init__.py +0 -0
psaiops/score/attention/__init__.py +0 -0
psaiops/score/attention/app.py +142 -0
psaiops/score/attention/lib.py +191 -0
{psaiops-0.0.2.dist-info → psaiops-0.0.3.dist-info}/METADATA +1 -1
psaiops-0.0.3.dist-info/RECORD +15 -0
psaiops/score/app.py +0 -27
psaiops-0.0.2.dist-info/RECORD +0 -10
{psaiops-0.0.2.dist-info → psaiops-0.0.3.dist-info}/WHEEL +0 -0

psaiops/combine/__init__.py ADDED Viewed

File without changes

psaiops/compose/contrast/__init__.py ADDED Viewed

File without changes

psaiops/edit/__init__.py ADDED Viewed

File without changes

psaiops/score/attention/__init__.py ADDED Viewed

File without changes

psaiops/score/attention/app.py ADDED Viewed

@@ -0,0 +1,142 @@
+import gradio
+import psaiops.score.attention.lib
+# META #########################################################################
+TITLE = '''Attention Scoring'''
+INTRO = '''Score each token according to the weights of the attention layers.\nThe model is fixed to "openai/gpt-oss-20b" for now.'''
+STYLE = ''''''
+MODEL = 'openai/gpt-oss-20b'
+# INTRO ########################################################################
+def create_intro_block(intro: str) -> dict:
+    __intro = gradio.Markdown(intro)
+    return {'intro_block': __intro}
+# MODEL ########################################################################
+def create_model_block() -> dict:
+    __model_dd = gradio.Dropdown(label='Model', value='openai/gpt-oss-20b', choices=['openai/gpt-oss-20b'], allow_custom_value=False, multiselect=False, interactive=True) # 'openai/gpt-oss-120b'
+    __layer_sl = gradio.Slider(label='Layer Depth', value=12, minimum=-1, maximum=23, step=1, interactive=True) # info='-1 to average on all layers'
+    __head_sl = gradio.Slider(label='Attention Head', value=-1, minimum=-1, maximum=63, step=1, interactive=True) # info='-1 to average on all heads'
+    __model_dd.change(fn=update_layer_range, inputs=[__layer_sl, __model_dd], outputs=__layer_sl, queue=False, show_progress='hidden')
+    return {
+        'model_block': __model_dd,
+        'layer_block': __layer_sl,
+        'head_block': __head_sl}
+# SAMPLING #####################################################################
+def create_sampling_block() -> dict:
+    __tokens = gradio.Slider(label='Tokens', value=32, minimum=0, maximum=128, step=1, interactive=True)
+    __topk = gradio.Slider(label='Top K', value=4, minimum=1, maximum=8, step=1, interactive=True)
+    __topp = gradio.Slider(label='Top P', value=0.8, minimum=0.0, maximum=1.0, step=0.1, interactive=True)
+    return {
+        'tokens_block': __tokens,
+        'topk_block': __topk,
+        'topp_block': __topp}
+# DISPLAY ######################################################################
+def create_display_block() -> dict:
+    __display = gradio.Radio(label='Display', value='Tokens', choices=['Tokens', 'Indexes'], interactive=True)
+    return {'display_block': __display}
+# INPUTS #######################################################################
+def create_inputs_block() -> dict:
+    __input = gradio.Textbox(label='Prompt', value='', placeholder='A string of tokens to score.', lines=4, show_copy_button=True, interactive=True)
+    return {'input_block': __input}
+# OUTPUTS ######################################################################
+def create_outputs_block() -> dict:
+    __output = gradio.HighlightedText(label='Scores', value='', interactive=False, show_legend=False, show_inline_category=False, combine_adjacent=True)
+    return {'output_block': __output}
+# ACTIONS ######################################################################
+def create_actions_block() -> dict:
+    __process = gradio.Button('Process', variant='primary', size='lg', interactive=True)
+    __position = gradio.Slider(label='Position', value=-1, minimum=-1, maximum=128, step=1, interactive=True) # info='-1 to average on all tokens'
+    return {
+        'process_block': __process,
+        'position_block': __position}
+# STATE ########################################################################
+def create_state() -> dict:
+    return {'attention_state': gradio.State(None), 'token_state': gradio.State(None)}
+# LAYOUT #######################################################################
+def create_layout(intro: str=INTRO) -> dict:
+    __fields = {}
+    __fields.update(create_intro_block(intro=intro))
+    with gradio.Tabs():
+        with gradio.Tab('Score Tokens') as __main_tab:
+            __fields.update({'main_tab': __main_tab})
+            with gradio.Row():
+                with gradio.Column(scale=1):
+                    __fields.update(create_inputs_block())
+                with gradio.Column(scale=1):
+                    __fields.update(create_outputs_block())
+            with gradio.Row():
+                __fields.update(create_actions_block())
+        with gradio.Tab('Settings') as __settings_tab:
+            __fields.update({'settings_tab': __settings_tab})
+            with gradio.Column(scale=1):
+                with gradio.Row():
+                    __fields.update(create_model_block())
+                with gradio.Row():
+                    __fields.update(create_sampling_block())
+                with gradio.Row():
+                    __fields.update(create_display_block())
+    return __fields
+# EVENTS #######################################################################
+def update_layer_range(value: int, model: str) -> dict:
+    return gradio.update(maximum=35, value=min(35, int(value))) if '120b' in model else gradio.update(maximum=23, value=min(23, int(value)))
+def update_position_range(value: int, dimension: int) -> dict:
+    return gradio.update(maximum=dimension - 1, value=min(dimension - 1, value))
+def update_output_value(
+    attention_data: torch.Tensor=None,
+    token_data: torch.Tensor=None,) -> torch.Tensor:
+    return
+# APP ##########################################################################
+def create_app(title: str=TITLE, intro: str=INTRO, style: str=STYLE, model: str=MODEL) -> gradio.Blocks:
+    __fields = {}
+    with gradio.Blocks(theme=gradio.themes.Soft(), title=title, css=style) as __app:
+        # init
+        __device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        __model = psaiops.score.attention.lib.get_model(name=model, device=__device)
+        __tokenizer = psaiops.score.attention.lib.get_tokenizer(name=model, device=__device)
+        # create the UI
+        __fields.update(create_layout(intro=intro))
+        # init the state
+        __fields.update(create_state())
+        # fetch the relevant fields
+        __button = __fields['process_block']
+        # wire the input fields
+        __button.click(
+            fn=psaiops.score.attention.lib.score_tokens,
+            inputs=[__model, __tokenizer] + [__fields[__k] for __k in ['input_block', 'tokens_block', 'topk_block', 'topp_block', 'position_block', 'layer_block', 'head_block']] + [__device],
+            outputs=__fields['output_block'],
+            queue=False,
+            show_progress='full')
+        # gradio application
+        return __app
+# MAIN #########################################################################
+if __name__ == '__main__':
+    __app = create_app()
+    __app.launch(share=True, debug=True)

psaiops/score/attention/lib.py ADDED Viewed

@@ -0,0 +1,191 @@
+import functools
+import torch
+import transformers
+import deformers.models.openai.gptoss
+# LOAD #########################################################################
+@functools.lru_cache(maxsize=4)
+def get_tokenizer(name: str, device: str='cpu'):
+    return transformers.AutoTokenizer.from_pretrained(
+        name,
+        use_fast=True,
+        dtype='auto',
+        device_map=device)
+@functools.lru_cache(maxsize=2)
+def get_model(name: str, device: str='cpu'):
+    __model = deformers.models.openai.gptoss.GptOssForCausalInference.from_pretrained(
+        name,
+        dtype='auto',
+        device_map=device)
+    # toggle the inference mode (not training)
+    __model.eval()
+    # transformers model
+    return __model
+# PREPROCESS #####################################################################
+def preprocess_token_ids(
+    tokenizer_obj: object,
+    prompt_str: str,
+    device_str: str='cpu'
+) -> dict:
+    # tokenize
+    __inputs = tokenizer_obj(prompt_str, return_tensors='pt')
+    # move to the main device
+    return {__k: __v.to(device_str) for __k, __v in __inputs.items()}
+# GENERATE #######################################################################
+def generate_token_ids(
+    model_obj: object,
+    input_args: dict,
+    token_num: int,
+    topk_num: int = 4,
+    topp_num: float = 0.9,
+) -> torch.Tensor:
+    # generate completion
+    with torch.no_grad():
+        __outputs = model_obj.generate(
+            **input_args,
+            max_new_tokens=token_num,
+            do_sample=(0.0 < topp_num < 1.0) or (topk_num > 0),
+            top_k=topk_num if (topk_num > 0) else None,
+            top_p=topp_num if (0.0 < topp_num < 1.0) else None,
+            return_dict_in_generate=True,
+            output_hidden_states=False,
+            output_attentions=False,
+            output_scores=False,
+            early_stopping=True,
+            use_cache=True)
+    # full sequence
+    return __outputs.sequences # (1, T)
+# COMPUTE ########################################################################
+def compute_attention_weights(
+    model_obj: object,
+    token_obj: torch.Tensor,
+) -> torch.Tensor:
+    # process the full sequence
+    with torch.no_grad():
+        __outputs = model_obj(
+            input_ids=token_obj,
+            output_attentions=True,
+            return_dict=True)
+    # parse the outputs
+    return torch.stack(__outputs.attentions, dim=0)
+# REDUCE #######################################################################
+def reduce_attention_weights(
+    attention_data: torch.Tensor,
+    token_idx: int, # -1 => avg over all tokens
+    layer_idx: int, # -1 => avg over layers
+    head_idx: int, # -1 => avg over heads
+    input_dim: int,
+) -> torch.Tensor:
+    # parse
+    __layer_dim, __batch_dim, __head_dim, __output_dim, __output_dim = tuple(attention_data.shape) # L, B, H, T, T
+    __layer_idx = min(layer_idx, __layer_dim)
+    __head_idx = min(head_idx, __head_dim)
+    __token_idx = min(token_idx, __output_dim - input_dim - 1) # T = I + O
+    # select the relevant data along each axis
+    __layer_slice = slice(None) if (__layer_idx < 0) else slice(__layer_idx, __layer_idx + 1)
+    __sample_slice = slice(None)
+    __head_slice = slice(None) if (__head_idx < 0) else slice(__head_idx, __head_idx + 1)
+    __token_slice = slice(input_dim, __output_dim) if (__token_idx < 0) else slice(input_dim + __token_idx, input_dim + __token_idx + 1)
+    # filter the data
+    __data = attention_data[__layer_slice, __sample_slice, __head_slice, __token_slice, slice(None)]
+    # reduce all the axes but the last
+    return __data.mean(dim=tuple(range(len(__data.shape) - 1)))
+# FORMAT #########################################################################
+def postprocess_attention_scores(
+    attention_data: torch.Tensor, # (T,)
+    input_dim: int,
+    token_idx: int,
+) -> list:
+    __output_dim = int(attention_data.shape[-1])
+    # isolate the scores of the input prompt
+    __input_slice = slice(0, input_dim)
+    # mask the token that were used to compute the scores
+    __token_idx = min(token_idx, __output_dim - input_dim - 1) # T = I + O
+    __output_range = list(range(__output_dim - input_dim)) if (__token_idx < 0) else [__token_idx]
+    __output_mask = torch.BoolTensor([__i in __output_range for __i in range(__output_dim - input_dim)])
+    # normalize the scores
+    __input_scores = attention_data[__input_slice] / (attention_data[__input_slice].sum() + 1e-5)
+    # round to obtain integer labels from 0 to 100
+    __input_scores = torch.round(100.0 * __input_scores, decimals=0).type(torch.int32)
+    # the generated tokens are not scored
+    __output_scores = torch.where(__output_mask, -1, 0).type(torch.int32)
+    # native list of integers
+    return __input_scores.tolist() + __output_scores.tolist() # (I,) + (O,) = (T,)
+# POSTPROCESS ####################################################################
+def postprocess_token_ids(
+    tokenizer_obj: object,
+    token_obj: torch.Tensor,
+) -> list:
+    # remove the batch axis
+    __indices = token_obj.squeeze().tolist()
+    # back to token strings
+    __tokens = tokenizer_obj.convert_ids_to_tokens(__indices)
+    # normalize the tokens
+    return [__t.replace('Ġ', ' ') for __t in __tokens]
+# COMPUTE ########################################################################
+def score_tokens(
+    model_obj: object,
+    tokenizer_obj: object,
+    prompt_str: str,
+    token_num: int=32,
+    topk_num: int = 4,
+    topp_num: float = 0.9,
+    token_idx: int, # -1 => avg over all tokens
+    layer_idx: int,   # -1 => avg over layers
+    head_idx: int,    # -1 => avg over heads
+    device_str: str='cuda',
+) -> list:
+    # dictionary {'input_ids': _, 'attention_mask': _}
+    __inputs = preprocess_token_ids(
+        tokenizer_obj=tokenizer_obj,
+        prompt_str=prompt_str,
+        device_str=device_str)
+    # parse the inputs
+    __input_dim = int(__inputs['input_ids'].shape[-1])
+    # tensor (1, T)
+    __outputs = generate_token_ids(
+        model_obj=model_obj,
+        input_args=__inputs,
+        token_num=token_num,
+        topk_num=topk_num,
+        topp_num=topp_num)
+    # tensor (L, S, H, T, T)
+    __attentions = compute_attention_weights(
+        model_obj=model_obj,
+        token_obj=__outputs)
+    # reduce the layer, sample, head and output token axes => tensor (T,)
+    __scores = reduce_attention_weights(
+        __attentions,
+        token_idx=token_idx,
+        layer_idx=layer_idx,
+        head_idx=head_idx,
+        input_dim=__input_dim)
+    # translate the scores into integer labels
+    __labels = postprocess_attention_scores(
+        __scores,
+        input_dim=__input_dim,
+        token_idx=token_idx)
+    # detokenize the IDs
+    __tokens = postprocess_token_ids(
+        tokenizer_obj=__tokenizer,
+        token_obj=__outputs)
+    # match tokens and labels for the HighlightedText field
+    return list(zip(__tokens, __labels))

{psaiops-0.0.2.dist-info → psaiops-0.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: psaiops
-Version: 0.0.2
+Version: 0.0.3
 Summary: Web apps to inspect & engineer NN activations.
 License: .github/LICENSE.md
 Author: apehex

psaiops-0.0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+psaiops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/combine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/compose/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/compose/contrast/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/edit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/elements/data.py,sha256=vGYeMN11uP9gs8rV6aSDffE_TeIX5PmdzWGwUpdGE2Y,906
+psaiops/score/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/score/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops/score/attention/app.py,sha256=FgTyGrklLfWVQICwtXT7mohBXmSMVyv5iRSgNC64Z-0,6549
+psaiops/score/attention/lib.py,sha256=UQObfalIAenLdg3qZw5l003fenvB5RLeav4G-8H3RHs,6925
+psaiops/steer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psaiops-0.0.3.dist-info/METADATA,sha256=aof38JNXN2bi0cG31ba1JCLfrB6onSCd3R-econzaL0,1221
+psaiops-0.0.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+psaiops-0.0.3.dist-info/RECORD,,

psaiops/score/app.py DELETED Viewed

@@ -1,27 +0,0 @@
-import gradio
-# META #########################################################################
-TITLE = '''Token Scoring'''
-INTRO = '''Score each input / output token according to a given metric.'''
-STYLE = ''''''
-# MODEL ########################################################################
-def create_model_tab() -> None:
-    pass
-# ROOT #########################################################################
-def create_root_block(title: str=TITLE, intro: str=INTRO, style: str=STYLE) -> gradio.Block:
-    with gradio.Blocks(theme=gradio.themes.Soft(), title=title, css=style) as __app:
-        with gradio.Row():
-            with gradio.Column(scale=1):
-                gradio.Markdown(intro)
-        return __app
-# MAIN #########################################################################
-if __name__ == '__main__':
-    __app = create_root_block()
-    __app.launch(share=True, debug=True)

psaiops-0.0.2.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-psaiops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-psaiops/compose/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-psaiops/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-psaiops/elements/data.py,sha256=vGYeMN11uP9gs8rV6aSDffE_TeIX5PmdzWGwUpdGE2Y,906
-psaiops/score/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-psaiops/score/app.py,sha256=7HfVEFdzJ1IarJ7A4FaWR8GeeGw2tttG993Qn-gzBmY,910
-psaiops/steer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-psaiops-0.0.2.dist-info/METADATA,sha256=Xu81-r81hohISmebG13Cftrr5tyETxei_Jar8ZxWCZM,1221
-psaiops-0.0.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-psaiops-0.0.2.dist-info/RECORD,,

{psaiops-0.0.2.dist-info → psaiops-0.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

psaiops 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

Potentially problematic release.

psaiops 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl