scout-ai 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +80 -15
- data/README.md +296 -0
- data/Rakefile +2 -0
- data/VERSION +1 -1
- data/doc/Agent.md +279 -0
- data/doc/Chat.md +258 -0
- data/doc/LLM.md +446 -0
- data/doc/Model.md +513 -0
- data/doc/RAG.md +129 -0
- data/lib/scout/llm/agent/chat.rb +51 -1
- data/lib/scout/llm/agent/delegate.rb +39 -0
- data/lib/scout/llm/agent/iterate.rb +44 -0
- data/lib/scout/llm/agent.rb +42 -21
- data/lib/scout/llm/ask.rb +38 -6
- data/lib/scout/llm/backends/anthropic.rb +147 -0
- data/lib/scout/llm/backends/bedrock.rb +1 -1
- data/lib/scout/llm/backends/ollama.rb +23 -29
- data/lib/scout/llm/backends/openai.rb +34 -40
- data/lib/scout/llm/backends/responses.rb +158 -110
- data/lib/scout/llm/chat.rb +250 -94
- data/lib/scout/llm/embed.rb +4 -4
- data/lib/scout/llm/mcp.rb +28 -0
- data/lib/scout/llm/parse.rb +1 -0
- data/lib/scout/llm/rag.rb +9 -0
- data/lib/scout/llm/tools/call.rb +66 -0
- data/lib/scout/llm/tools/knowledge_base.rb +158 -0
- data/lib/scout/llm/tools/mcp.rb +59 -0
- data/lib/scout/llm/tools/workflow.rb +69 -0
- data/lib/scout/llm/tools.rb +58 -143
- data/lib/scout-ai.rb +1 -0
- data/scout-ai.gemspec +31 -18
- data/scout_commands/agent/ask +28 -71
- data/scout_commands/documenter +148 -0
- data/scout_commands/llm/ask +2 -2
- data/scout_commands/llm/server +319 -0
- data/share/server/chat.html +138 -0
- data/share/server/chat.js +468 -0
- data/test/scout/llm/backends/test_anthropic.rb +134 -0
- data/test/scout/llm/backends/test_openai.rb +45 -6
- data/test/scout/llm/backends/test_responses.rb +124 -0
- data/test/scout/llm/test_agent.rb +0 -70
- data/test/scout/llm/test_ask.rb +3 -1
- data/test/scout/llm/test_chat.rb +43 -1
- data/test/scout/llm/test_mcp.rb +29 -0
- data/test/scout/llm/tools/test_knowledge_base.rb +22 -0
- data/test/scout/llm/tools/test_mcp.rb +11 -0
- data/test/scout/llm/tools/test_workflow.rb +39 -0
- metadata +56 -17
- data/README.rdoc +0 -18
- data/python/scout_ai/__pycache__/__init__.cpython-310.pyc +0 -0
- data/python/scout_ai/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/scout_ai/__pycache__/huggingface.cpython-310.pyc +0 -0
- data/python/scout_ai/__pycache__/huggingface.cpython-311.pyc +0 -0
- data/python/scout_ai/__pycache__/util.cpython-310.pyc +0 -0
- data/python/scout_ai/__pycache__/util.cpython-311.pyc +0 -0
- data/python/scout_ai/atcold/plot_lib.py +0 -141
- data/python/scout_ai/atcold/spiral.py +0 -27
- data/python/scout_ai/huggingface/train/__pycache__/__init__.cpython-310.pyc +0 -0
- data/python/scout_ai/huggingface/train/__pycache__/next_token.cpython-310.pyc +0 -0
- data/python/scout_ai/language_model.py +0 -70
- /data/{python/scout_ai/atcold/__init__.py → test/scout/llm/tools/test_call.rb} +0 -0
@@ -1,141 +0,0 @@
|
|
1
|
-
from matplotlib import pyplot as plt
|
2
|
-
import numpy as np
|
3
|
-
import torch
|
4
|
-
from IPython.display import HTML, display
|
5
|
-
|
6
|
-
|
7
|
-
def set_default(figsize=(10, 10), dpi=100):
|
8
|
-
plt.style.use(['dark_background', 'bmh'])
|
9
|
-
plt.rc('axes', facecolor='k')
|
10
|
-
plt.rc('figure', facecolor='k')
|
11
|
-
plt.rc('figure', figsize=figsize, dpi=dpi)
|
12
|
-
|
13
|
-
|
14
|
-
def plot_data(X, y, d=0, auto=False, zoom=1):
|
15
|
-
X = X.cpu()
|
16
|
-
y = y.cpu()
|
17
|
-
plt.scatter(X.numpy()[:, 0], X.numpy()[:, 1], c=y, s=20, cmap=plt.cm.Spectral)
|
18
|
-
plt.axis('square')
|
19
|
-
plt.axis(np.array((-1.1, 1.1, -1.1, 1.1)) * zoom)
|
20
|
-
if auto is True: plt.axis('equal')
|
21
|
-
plt.axis('off')
|
22
|
-
|
23
|
-
_m, _c = 0, '.15'
|
24
|
-
plt.axvline(0, ymin=_m, color=_c, lw=1, zorder=0)
|
25
|
-
plt.axhline(0, xmin=_m, color=_c, lw=1, zorder=0)
|
26
|
-
|
27
|
-
|
28
|
-
def plot_model(X, y, model):
|
29
|
-
model.cpu()
|
30
|
-
mesh = np.arange(-1.1, 1.1, 0.01)
|
31
|
-
xx, yy = np.meshgrid(mesh, mesh)
|
32
|
-
with torch.no_grad():
|
33
|
-
data = torch.from_numpy(np.vstack((xx.reshape(-1), yy.reshape(-1))).T).float()
|
34
|
-
Z = model(data).detach()
|
35
|
-
Z = np.argmax(Z, axis=1).reshape(xx.shape)
|
36
|
-
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.3)
|
37
|
-
plot_data(X, y)
|
38
|
-
|
39
|
-
|
40
|
-
def show_scatterplot(X, colors, title=''):
|
41
|
-
colors = colors.cpu().numpy()
|
42
|
-
X = X.cpu().numpy()
|
43
|
-
plt.figure()
|
44
|
-
plt.axis('equal')
|
45
|
-
plt.scatter(X[:, 0], X[:, 1], c=colors, s=30)
|
46
|
-
# plt.grid(True)
|
47
|
-
plt.title(title)
|
48
|
-
plt.axis('off')
|
49
|
-
|
50
|
-
|
51
|
-
def plot_bases(bases, width=0.04):
|
52
|
-
bases = bases.cpu()
|
53
|
-
bases[2:] -= bases[:2]
|
54
|
-
plt.arrow(*bases[0], *bases[2], width=width, color=(1,0,0), zorder=10, alpha=1., length_includes_head=True)
|
55
|
-
plt.arrow(*bases[1], *bases[3], width=width, color=(0,1,0), zorder=10, alpha=1., length_includes_head=True)
|
56
|
-
|
57
|
-
|
58
|
-
def show_mat(mat, vect, prod, threshold=-1):
|
59
|
-
# Subplot grid definition
|
60
|
-
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharex=False, sharey=True,
|
61
|
-
gridspec_kw={'width_ratios':[5,1,1]})
|
62
|
-
# Plot matrices
|
63
|
-
cax1 = ax1.matshow(mat.numpy(), clim=(-1, 1))
|
64
|
-
ax2.matshow(vect.numpy(), clim=(-1, 1))
|
65
|
-
cax3 = ax3.matshow(prod.numpy(), clim=(threshold, 1))
|
66
|
-
|
67
|
-
# Set titles
|
68
|
-
ax1.set_title(f'A: {mat.size(0)} \u00D7 {mat.size(1)}')
|
69
|
-
ax2.set_title(f'a^(i): {vect.numel()}')
|
70
|
-
ax3.set_title(f'p: {prod.numel()}')
|
71
|
-
|
72
|
-
# Remove xticks for vectors
|
73
|
-
ax2.set_xticks(tuple())
|
74
|
-
ax3.set_xticks(tuple())
|
75
|
-
|
76
|
-
# Plot colourbars
|
77
|
-
fig.colorbar(cax1, ax=ax2)
|
78
|
-
fig.colorbar(cax3, ax=ax3)
|
79
|
-
|
80
|
-
# Fix y-axis limits
|
81
|
-
ax1.set_ylim(bottom=max(len(prod), len(vect)) - 0.5)
|
82
|
-
|
83
|
-
|
84
|
-
colors = dict(
|
85
|
-
aqua='#8dd3c7',
|
86
|
-
yellow='#ffffb3',
|
87
|
-
lavender='#bebada',
|
88
|
-
red='#fb8072',
|
89
|
-
blue='#80b1d3',
|
90
|
-
orange='#fdb462',
|
91
|
-
green='#b3de69',
|
92
|
-
pink='#fccde5',
|
93
|
-
grey='#d9d9d9',
|
94
|
-
violet='#bc80bd',
|
95
|
-
unk1='#ccebc5',
|
96
|
-
unk2='#ffed6f',
|
97
|
-
)
|
98
|
-
|
99
|
-
|
100
|
-
def _cstr(s, color='black'):
|
101
|
-
if s == ' ':
|
102
|
-
return f'<text style=color:#000;padding-left:10px;background-color:{color}> </text>'
|
103
|
-
else:
|
104
|
-
return f'<text style=color:#000;background-color:{color}>{s} </text>'
|
105
|
-
|
106
|
-
# print html
|
107
|
-
def _print_color(t):
|
108
|
-
display(HTML(''.join([_cstr(ti, color=ci) for ti, ci in t])))
|
109
|
-
|
110
|
-
# get appropriate color for value
|
111
|
-
def _get_clr(value):
|
112
|
-
colors = ('#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8',
|
113
|
-
'#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
|
114
|
-
'#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
|
115
|
-
'#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e')
|
116
|
-
value = int((value * 100) / 5)
|
117
|
-
if value == len(colors): value -= 1 # fixing bugs...
|
118
|
-
return colors[value]
|
119
|
-
|
120
|
-
def _visualise_values(output_values, result_list):
|
121
|
-
text_colours = []
|
122
|
-
for i in range(len(output_values)):
|
123
|
-
text = (result_list[i], _get_clr(output_values[i]))
|
124
|
-
text_colours.append(text)
|
125
|
-
_print_color(text_colours)
|
126
|
-
|
127
|
-
def print_colourbar():
|
128
|
-
color_range = torch.linspace(-2.5, 2.5, 20)
|
129
|
-
to_print = [(f'{x:.2f}', _get_clr((x+2.5)/5)) for x in color_range]
|
130
|
-
_print_color(to_print)
|
131
|
-
|
132
|
-
|
133
|
-
# Let's only focus on the last time step for now
|
134
|
-
# First, the cell state (Long term memory)
|
135
|
-
def plot_state(data, state, b, decoder):
|
136
|
-
actual_data = decoder(data[b, :, :].numpy())
|
137
|
-
seq_len = len(actual_data)
|
138
|
-
seq_len_w_pad = len(state)
|
139
|
-
for s in range(state.size(2)):
|
140
|
-
states = torch.sigmoid(state[:, b, s])
|
141
|
-
_visualise_values(states[seq_len_w_pad - seq_len:], list(actual_data))
|
@@ -1,27 +0,0 @@
|
|
1
|
-
import torch
|
2
|
-
import math
|
3
|
-
def spiral_data(N=1000, D=2, C=3):
|
4
|
-
X = torch.zeros(N * C, D)
|
5
|
-
y = torch.zeros(N * C, dtype=torch.long)
|
6
|
-
for c in range(C):
|
7
|
-
index = 0
|
8
|
-
t = torch.linspace(0, 1, N)
|
9
|
-
# When c = 0 and t = 0: start of linspace
|
10
|
-
# When c = 0 and t = 1: end of linpace
|
11
|
-
# This inner_var is for the formula inside sin() and cos() like sin(inner_var) and cos(inner_Var)
|
12
|
-
inner_var = torch.linspace(
|
13
|
-
# When t = 0
|
14
|
-
(2 * math.pi / C) * (c),
|
15
|
-
# When t = 1
|
16
|
-
(2 * math.pi / C) * (2 + c),
|
17
|
-
N
|
18
|
-
) + torch.randn(N) * 0.2
|
19
|
-
|
20
|
-
for ix in range(N * c, N * (c + 1)):
|
21
|
-
X[ix] = t[index] * torch.FloatTensor((
|
22
|
-
math.sin(inner_var[index]), math.cos(inner_var[index])
|
23
|
-
))
|
24
|
-
y[ix] = c
|
25
|
-
index += 1
|
26
|
-
|
27
|
-
return (X, y)
|
Binary file
|
Binary file
|
@@ -1,70 +0,0 @@
|
|
1
|
-
def group_texts(examples):
|
2
|
-
# Concatenate all texts.
|
3
|
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
4
|
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
5
|
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
6
|
-
# customize this part to your needs.
|
7
|
-
total_length = (total_length // block_size) * block_size
|
8
|
-
# Split by chunks of max_len.
|
9
|
-
result = {
|
10
|
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
11
|
-
for k, t in concatenated_examples.items()
|
12
|
-
}
|
13
|
-
result["labels"] = result["input_ids"].copy()
|
14
|
-
return result
|
15
|
-
|
16
|
-
def whole_word_masking_data_collator(features):
|
17
|
-
from transformers import default_data_collator
|
18
|
-
for feature in features:
|
19
|
-
word_ids = feature.pop("word_ids")
|
20
|
-
|
21
|
-
# Create a map between words and corresponding token indices
|
22
|
-
mapping = collections.defaultdict(list)
|
23
|
-
current_word_index = -1
|
24
|
-
current_word = None
|
25
|
-
for idx, word_id in enumerate(word_ids):
|
26
|
-
if word_id is not None:
|
27
|
-
if word_id != current_word:
|
28
|
-
current_word = word_id
|
29
|
-
current_word_index += 1
|
30
|
-
mapping[current_word_index].append(idx)
|
31
|
-
|
32
|
-
# Randomly mask words
|
33
|
-
mask = np.random.binomial(1, wwm_probability, (len(mapping),))
|
34
|
-
input_ids = feature["input_ids"]
|
35
|
-
labels = feature["labels"]
|
36
|
-
new_labels = [-100] * len(labels)
|
37
|
-
for word_id in np.where(mask)[0]:
|
38
|
-
word_id = word_id.item()
|
39
|
-
for idx in mapping[word_id]:
|
40
|
-
new_labels[idx] = labels[idx]
|
41
|
-
input_ids[idx] = tokenizer.mask_token_id
|
42
|
-
feature["labels"] = new_labels
|
43
|
-
|
44
|
-
return default_data_collator(features)
|
45
|
-
|
46
|
-
if __name__ == "__main__2":
|
47
|
-
|
48
|
-
from transformers import AutoModelForMaskedLM
|
49
|
-
from transformers import AutoTokenizer
|
50
|
-
import torch
|
51
|
-
|
52
|
-
model_checkpoint = "distilbert-base-uncased"
|
53
|
-
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
|
54
|
-
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
55
|
-
|
56
|
-
text = "This is a great [MASK]."
|
57
|
-
|
58
|
-
inputs = tokenizer(text, return_tensors="pt")
|
59
|
-
token_logits = model(**inputs).logits
|
60
|
-
# Find the location of [MASK] and extract its logits
|
61
|
-
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
|
62
|
-
mask_token_logits = token_logits[0, mask_token_index, :]
|
63
|
-
# Pick the [MASK] candidates with the highest logits
|
64
|
-
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
65
|
-
|
66
|
-
for token in top_5_tokens:
|
67
|
-
print(f"'>>> {text.replace(tokenizer.mask_token, tokenizer.decode([token]))}'")
|
68
|
-
|
69
|
-
|
70
|
-
|
File without changes
|