scout-ai 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +80 -15
  3. data/README.md +296 -0
  4. data/Rakefile +2 -0
  5. data/VERSION +1 -1
  6. data/doc/Agent.md +279 -0
  7. data/doc/Chat.md +258 -0
  8. data/doc/LLM.md +446 -0
  9. data/doc/Model.md +513 -0
  10. data/doc/RAG.md +129 -0
  11. data/lib/scout/llm/agent/chat.rb +51 -1
  12. data/lib/scout/llm/agent/delegate.rb +39 -0
  13. data/lib/scout/llm/agent/iterate.rb +44 -0
  14. data/lib/scout/llm/agent.rb +42 -21
  15. data/lib/scout/llm/ask.rb +38 -6
  16. data/lib/scout/llm/backends/anthropic.rb +147 -0
  17. data/lib/scout/llm/backends/bedrock.rb +1 -1
  18. data/lib/scout/llm/backends/ollama.rb +23 -29
  19. data/lib/scout/llm/backends/openai.rb +34 -40
  20. data/lib/scout/llm/backends/responses.rb +158 -110
  21. data/lib/scout/llm/chat.rb +250 -94
  22. data/lib/scout/llm/embed.rb +4 -4
  23. data/lib/scout/llm/mcp.rb +28 -0
  24. data/lib/scout/llm/parse.rb +1 -0
  25. data/lib/scout/llm/rag.rb +9 -0
  26. data/lib/scout/llm/tools/call.rb +66 -0
  27. data/lib/scout/llm/tools/knowledge_base.rb +158 -0
  28. data/lib/scout/llm/tools/mcp.rb +59 -0
  29. data/lib/scout/llm/tools/workflow.rb +69 -0
  30. data/lib/scout/llm/tools.rb +58 -143
  31. data/lib/scout-ai.rb +1 -0
  32. data/scout-ai.gemspec +31 -18
  33. data/scout_commands/agent/ask +28 -71
  34. data/scout_commands/documenter +148 -0
  35. data/scout_commands/llm/ask +2 -2
  36. data/scout_commands/llm/server +319 -0
  37. data/share/server/chat.html +138 -0
  38. data/share/server/chat.js +468 -0
  39. data/test/scout/llm/backends/test_anthropic.rb +134 -0
  40. data/test/scout/llm/backends/test_openai.rb +45 -6
  41. data/test/scout/llm/backends/test_responses.rb +124 -0
  42. data/test/scout/llm/test_agent.rb +0 -70
  43. data/test/scout/llm/test_ask.rb +3 -1
  44. data/test/scout/llm/test_chat.rb +43 -1
  45. data/test/scout/llm/test_mcp.rb +29 -0
  46. data/test/scout/llm/tools/test_knowledge_base.rb +22 -0
  47. data/test/scout/llm/tools/test_mcp.rb +11 -0
  48. data/test/scout/llm/tools/test_workflow.rb +39 -0
  49. metadata +56 -17
  50. data/README.rdoc +0 -18
  51. data/python/scout_ai/__pycache__/__init__.cpython-310.pyc +0 -0
  52. data/python/scout_ai/__pycache__/__init__.cpython-311.pyc +0 -0
  53. data/python/scout_ai/__pycache__/huggingface.cpython-310.pyc +0 -0
  54. data/python/scout_ai/__pycache__/huggingface.cpython-311.pyc +0 -0
  55. data/python/scout_ai/__pycache__/util.cpython-310.pyc +0 -0
  56. data/python/scout_ai/__pycache__/util.cpython-311.pyc +0 -0
  57. data/python/scout_ai/atcold/plot_lib.py +0 -141
  58. data/python/scout_ai/atcold/spiral.py +0 -27
  59. data/python/scout_ai/huggingface/train/__pycache__/__init__.cpython-310.pyc +0 -0
  60. data/python/scout_ai/huggingface/train/__pycache__/next_token.cpython-310.pyc +0 -0
  61. data/python/scout_ai/language_model.py +0 -70
  62. /data/{python/scout_ai/atcold/__init__.py → test/scout/llm/tools/test_call.rb} +0 -0
@@ -1,141 +0,0 @@
1
- from matplotlib import pyplot as plt
2
- import numpy as np
3
- import torch
4
- from IPython.display import HTML, display
5
-
6
-
7
- def set_default(figsize=(10, 10), dpi=100):
8
- plt.style.use(['dark_background', 'bmh'])
9
- plt.rc('axes', facecolor='k')
10
- plt.rc('figure', facecolor='k')
11
- plt.rc('figure', figsize=figsize, dpi=dpi)
12
-
13
-
14
- def plot_data(X, y, d=0, auto=False, zoom=1):
15
- X = X.cpu()
16
- y = y.cpu()
17
- plt.scatter(X.numpy()[:, 0], X.numpy()[:, 1], c=y, s=20, cmap=plt.cm.Spectral)
18
- plt.axis('square')
19
- plt.axis(np.array((-1.1, 1.1, -1.1, 1.1)) * zoom)
20
- if auto is True: plt.axis('equal')
21
- plt.axis('off')
22
-
23
- _m, _c = 0, '.15'
24
- plt.axvline(0, ymin=_m, color=_c, lw=1, zorder=0)
25
- plt.axhline(0, xmin=_m, color=_c, lw=1, zorder=0)
26
-
27
-
28
- def plot_model(X, y, model):
29
- model.cpu()
30
- mesh = np.arange(-1.1, 1.1, 0.01)
31
- xx, yy = np.meshgrid(mesh, mesh)
32
- with torch.no_grad():
33
- data = torch.from_numpy(np.vstack((xx.reshape(-1), yy.reshape(-1))).T).float()
34
- Z = model(data).detach()
35
- Z = np.argmax(Z, axis=1).reshape(xx.shape)
36
- plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.3)
37
- plot_data(X, y)
38
-
39
-
40
- def show_scatterplot(X, colors, title=''):
41
- colors = colors.cpu().numpy()
42
- X = X.cpu().numpy()
43
- plt.figure()
44
- plt.axis('equal')
45
- plt.scatter(X[:, 0], X[:, 1], c=colors, s=30)
46
- # plt.grid(True)
47
- plt.title(title)
48
- plt.axis('off')
49
-
50
-
51
- def plot_bases(bases, width=0.04):
52
- bases = bases.cpu()
53
- bases[2:] -= bases[:2]
54
- plt.arrow(*bases[0], *bases[2], width=width, color=(1,0,0), zorder=10, alpha=1., length_includes_head=True)
55
- plt.arrow(*bases[1], *bases[3], width=width, color=(0,1,0), zorder=10, alpha=1., length_includes_head=True)
56
-
57
-
58
- def show_mat(mat, vect, prod, threshold=-1):
59
- # Subplot grid definition
60
- fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharex=False, sharey=True,
61
- gridspec_kw={'width_ratios':[5,1,1]})
62
- # Plot matrices
63
- cax1 = ax1.matshow(mat.numpy(), clim=(-1, 1))
64
- ax2.matshow(vect.numpy(), clim=(-1, 1))
65
- cax3 = ax3.matshow(prod.numpy(), clim=(threshold, 1))
66
-
67
- # Set titles
68
- ax1.set_title(f'A: {mat.size(0)} \u00D7 {mat.size(1)}')
69
- ax2.set_title(f'a^(i): {vect.numel()}')
70
- ax3.set_title(f'p: {prod.numel()}')
71
-
72
- # Remove xticks for vectors
73
- ax2.set_xticks(tuple())
74
- ax3.set_xticks(tuple())
75
-
76
- # Plot colourbars
77
- fig.colorbar(cax1, ax=ax2)
78
- fig.colorbar(cax3, ax=ax3)
79
-
80
- # Fix y-axis limits
81
- ax1.set_ylim(bottom=max(len(prod), len(vect)) - 0.5)
82
-
83
-
84
- colors = dict(
85
- aqua='#8dd3c7',
86
- yellow='#ffffb3',
87
- lavender='#bebada',
88
- red='#fb8072',
89
- blue='#80b1d3',
90
- orange='#fdb462',
91
- green='#b3de69',
92
- pink='#fccde5',
93
- grey='#d9d9d9',
94
- violet='#bc80bd',
95
- unk1='#ccebc5',
96
- unk2='#ffed6f',
97
- )
98
-
99
-
100
- def _cstr(s, color='black'):
101
- if s == ' ':
102
- return f'<text style=color:#000;padding-left:10px;background-color:{color}> </text>'
103
- else:
104
- return f'<text style=color:#000;background-color:{color}>{s} </text>'
105
-
106
- # print html
107
- def _print_color(t):
108
- display(HTML(''.join([_cstr(ti, color=ci) for ti, ci in t])))
109
-
110
- # get appropriate color for value
111
- def _get_clr(value):
112
- colors = ('#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8',
113
- '#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
114
- '#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
115
- '#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e')
116
- value = int((value * 100) / 5)
117
- if value == len(colors): value -= 1 # fixing bugs...
118
- return colors[value]
119
-
120
- def _visualise_values(output_values, result_list):
121
- text_colours = []
122
- for i in range(len(output_values)):
123
- text = (result_list[i], _get_clr(output_values[i]))
124
- text_colours.append(text)
125
- _print_color(text_colours)
126
-
127
- def print_colourbar():
128
- color_range = torch.linspace(-2.5, 2.5, 20)
129
- to_print = [(f'{x:.2f}', _get_clr((x+2.5)/5)) for x in color_range]
130
- _print_color(to_print)
131
-
132
-
133
- # Let's only focus on the last time step for now
134
- # First, the cell state (Long term memory)
135
- def plot_state(data, state, b, decoder):
136
- actual_data = decoder(data[b, :, :].numpy())
137
- seq_len = len(actual_data)
138
- seq_len_w_pad = len(state)
139
- for s in range(state.size(2)):
140
- states = torch.sigmoid(state[:, b, s])
141
- _visualise_values(states[seq_len_w_pad - seq_len:], list(actual_data))
@@ -1,27 +0,0 @@
1
- import torch
2
- import math
3
- def spiral_data(N=1000, D=2, C=3):
4
- X = torch.zeros(N * C, D)
5
- y = torch.zeros(N * C, dtype=torch.long)
6
- for c in range(C):
7
- index = 0
8
- t = torch.linspace(0, 1, N)
9
- # When c = 0 and t = 0: start of linspace
10
- # When c = 0 and t = 1: end of linpace
11
- # This inner_var is for the formula inside sin() and cos() like sin(inner_var) and cos(inner_Var)
12
- inner_var = torch.linspace(
13
- # When t = 0
14
- (2 * math.pi / C) * (c),
15
- # When t = 1
16
- (2 * math.pi / C) * (2 + c),
17
- N
18
- ) + torch.randn(N) * 0.2
19
-
20
- for ix in range(N * c, N * (c + 1)):
21
- X[ix] = t[index] * torch.FloatTensor((
22
- math.sin(inner_var[index]), math.cos(inner_var[index])
23
- ))
24
- y[ix] = c
25
- index += 1
26
-
27
- return (X, y)
@@ -1,70 +0,0 @@
1
- def group_texts(examples):
2
- # Concatenate all texts.
3
- concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
4
- total_length = len(concatenated_examples[list(examples.keys())[0]])
5
- # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
6
- # customize this part to your needs.
7
- total_length = (total_length // block_size) * block_size
8
- # Split by chunks of max_len.
9
- result = {
10
- k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
11
- for k, t in concatenated_examples.items()
12
- }
13
- result["labels"] = result["input_ids"].copy()
14
- return result
15
-
16
- def whole_word_masking_data_collator(features):
17
- from transformers import default_data_collator
18
- for feature in features:
19
- word_ids = feature.pop("word_ids")
20
-
21
- # Create a map between words and corresponding token indices
22
- mapping = collections.defaultdict(list)
23
- current_word_index = -1
24
- current_word = None
25
- for idx, word_id in enumerate(word_ids):
26
- if word_id is not None:
27
- if word_id != current_word:
28
- current_word = word_id
29
- current_word_index += 1
30
- mapping[current_word_index].append(idx)
31
-
32
- # Randomly mask words
33
- mask = np.random.binomial(1, wwm_probability, (len(mapping),))
34
- input_ids = feature["input_ids"]
35
- labels = feature["labels"]
36
- new_labels = [-100] * len(labels)
37
- for word_id in np.where(mask)[0]:
38
- word_id = word_id.item()
39
- for idx in mapping[word_id]:
40
- new_labels[idx] = labels[idx]
41
- input_ids[idx] = tokenizer.mask_token_id
42
- feature["labels"] = new_labels
43
-
44
- return default_data_collator(features)
45
-
46
- if __name__ == "__main__2":
47
-
48
- from transformers import AutoModelForMaskedLM
49
- from transformers import AutoTokenizer
50
- import torch
51
-
52
- model_checkpoint = "distilbert-base-uncased"
53
- model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
54
- tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
55
-
56
- text = "This is a great [MASK]."
57
-
58
- inputs = tokenizer(text, return_tensors="pt")
59
- token_logits = model(**inputs).logits
60
- # Find the location of [MASK] and extract its logits
61
- mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
62
- mask_token_logits = token_logits[0, mask_token_index, :]
63
- # Pick the [MASK] candidates with the highest logits
64
- top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
65
-
66
- for token in top_5_tokens:
67
- print(f"'>>> {text.replace(tokenizer.mask_token, tokenizer.decode([token]))}'")
68
-
69
-
70
-