scout-ai 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +87 -15
- data/README.md +296 -0
- data/Rakefile +2 -0
- data/VERSION +1 -1
- data/doc/Agent.md +279 -0
- data/doc/Chat.md +258 -0
- data/doc/LLM.md +446 -0
- data/doc/Model.md +513 -0
- data/doc/RAG.md +129 -0
- data/lib/scout/llm/agent/chat.rb +48 -1
- data/lib/scout/llm/agent/delegate.rb +51 -0
- data/lib/scout/llm/agent/iterate.rb +44 -0
- data/lib/scout/llm/agent.rb +43 -22
- data/lib/scout/llm/ask.rb +47 -7
- data/lib/scout/llm/backends/anthropic.rb +147 -0
- data/lib/scout/llm/backends/bedrock.rb +1 -1
- data/lib/scout/llm/backends/ollama.rb +27 -30
- data/lib/scout/llm/backends/openai.rb +36 -41
- data/lib/scout/llm/backends/responses.rb +166 -113
- data/lib/scout/llm/chat.rb +270 -102
- data/lib/scout/llm/embed.rb +4 -4
- data/lib/scout/llm/mcp.rb +28 -0
- data/lib/scout/llm/parse.rb +1 -0
- data/lib/scout/llm/rag.rb +9 -0
- data/lib/scout/llm/tools/call.rb +76 -0
- data/lib/scout/llm/tools/knowledge_base.rb +159 -0
- data/lib/scout/llm/tools/mcp.rb +59 -0
- data/lib/scout/llm/tools/workflow.rb +106 -0
- data/lib/scout/llm/tools.rb +98 -141
- data/lib/scout-ai.rb +1 -0
- data/scout-ai.gemspec +31 -18
- data/scout_commands/agent/ask +59 -78
- data/scout_commands/documenter +148 -0
- data/scout_commands/llm/ask +3 -2
- data/scout_commands/llm/server +319 -0
- data/share/server/chat.html +138 -0
- data/share/server/chat.js +468 -0
- data/test/scout/llm/backends/test_anthropic.rb +134 -0
- data/test/scout/llm/backends/test_ollama.rb +1 -1
- data/test/scout/llm/backends/test_openai.rb +45 -6
- data/test/scout/llm/backends/test_responses.rb +124 -0
- data/test/scout/llm/test_agent.rb +1 -93
- data/test/scout/llm/test_ask.rb +3 -1
- data/test/scout/llm/test_chat.rb +43 -1
- data/test/scout/llm/test_mcp.rb +29 -0
- data/test/scout/llm/tools/test_knowledge_base.rb +22 -0
- data/test/scout/llm/tools/test_mcp.rb +11 -0
- data/test/scout/llm/tools/test_workflow.rb +39 -0
- metadata +56 -17
- data/README.rdoc +0 -18
- data/python/scout_ai/__pycache__/__init__.cpython-310.pyc +0 -0
- data/python/scout_ai/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/scout_ai/__pycache__/huggingface.cpython-310.pyc +0 -0
- data/python/scout_ai/__pycache__/huggingface.cpython-311.pyc +0 -0
- data/python/scout_ai/__pycache__/util.cpython-310.pyc +0 -0
- data/python/scout_ai/__pycache__/util.cpython-311.pyc +0 -0
- data/python/scout_ai/atcold/plot_lib.py +0 -141
- data/python/scout_ai/atcold/spiral.py +0 -27
- data/python/scout_ai/huggingface/train/__pycache__/__init__.cpython-310.pyc +0 -0
- data/python/scout_ai/huggingface/train/__pycache__/next_token.cpython-310.pyc +0 -0
- data/python/scout_ai/language_model.py +0 -70
- /data/{python/scout_ai/atcold/__init__.py → test/scout/llm/tools/test_call.rb} +0 -0
data/README.rdoc
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
= scout-ai
|
|
2
|
-
|
|
3
|
-
Description goes here.
|
|
4
|
-
|
|
5
|
-
== Contributing to scout-ai
|
|
6
|
-
|
|
7
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
|
8
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
|
9
|
-
* Fork the project.
|
|
10
|
-
* Start a feature/bugfix branch.
|
|
11
|
-
* Commit and push until you are happy with your contribution.
|
|
12
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
|
13
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
|
14
|
-
|
|
15
|
-
== Copyright
|
|
16
|
-
|
|
17
|
-
Copyright (c) 2025 Miguel Vazquez. See LICENSE.txt for
|
|
18
|
-
further details.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
from matplotlib import pyplot as plt
|
|
2
|
-
import numpy as np
|
|
3
|
-
import torch
|
|
4
|
-
from IPython.display import HTML, display
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def set_default(figsize=(10, 10), dpi=100):
|
|
8
|
-
plt.style.use(['dark_background', 'bmh'])
|
|
9
|
-
plt.rc('axes', facecolor='k')
|
|
10
|
-
plt.rc('figure', facecolor='k')
|
|
11
|
-
plt.rc('figure', figsize=figsize, dpi=dpi)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def plot_data(X, y, d=0, auto=False, zoom=1):
|
|
15
|
-
X = X.cpu()
|
|
16
|
-
y = y.cpu()
|
|
17
|
-
plt.scatter(X.numpy()[:, 0], X.numpy()[:, 1], c=y, s=20, cmap=plt.cm.Spectral)
|
|
18
|
-
plt.axis('square')
|
|
19
|
-
plt.axis(np.array((-1.1, 1.1, -1.1, 1.1)) * zoom)
|
|
20
|
-
if auto is True: plt.axis('equal')
|
|
21
|
-
plt.axis('off')
|
|
22
|
-
|
|
23
|
-
_m, _c = 0, '.15'
|
|
24
|
-
plt.axvline(0, ymin=_m, color=_c, lw=1, zorder=0)
|
|
25
|
-
plt.axhline(0, xmin=_m, color=_c, lw=1, zorder=0)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def plot_model(X, y, model):
|
|
29
|
-
model.cpu()
|
|
30
|
-
mesh = np.arange(-1.1, 1.1, 0.01)
|
|
31
|
-
xx, yy = np.meshgrid(mesh, mesh)
|
|
32
|
-
with torch.no_grad():
|
|
33
|
-
data = torch.from_numpy(np.vstack((xx.reshape(-1), yy.reshape(-1))).T).float()
|
|
34
|
-
Z = model(data).detach()
|
|
35
|
-
Z = np.argmax(Z, axis=1).reshape(xx.shape)
|
|
36
|
-
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.3)
|
|
37
|
-
plot_data(X, y)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def show_scatterplot(X, colors, title=''):
|
|
41
|
-
colors = colors.cpu().numpy()
|
|
42
|
-
X = X.cpu().numpy()
|
|
43
|
-
plt.figure()
|
|
44
|
-
plt.axis('equal')
|
|
45
|
-
plt.scatter(X[:, 0], X[:, 1], c=colors, s=30)
|
|
46
|
-
# plt.grid(True)
|
|
47
|
-
plt.title(title)
|
|
48
|
-
plt.axis('off')
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def plot_bases(bases, width=0.04):
|
|
52
|
-
bases = bases.cpu()
|
|
53
|
-
bases[2:] -= bases[:2]
|
|
54
|
-
plt.arrow(*bases[0], *bases[2], width=width, color=(1,0,0), zorder=10, alpha=1., length_includes_head=True)
|
|
55
|
-
plt.arrow(*bases[1], *bases[3], width=width, color=(0,1,0), zorder=10, alpha=1., length_includes_head=True)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def show_mat(mat, vect, prod, threshold=-1):
|
|
59
|
-
# Subplot grid definition
|
|
60
|
-
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharex=False, sharey=True,
|
|
61
|
-
gridspec_kw={'width_ratios':[5,1,1]})
|
|
62
|
-
# Plot matrices
|
|
63
|
-
cax1 = ax1.matshow(mat.numpy(), clim=(-1, 1))
|
|
64
|
-
ax2.matshow(vect.numpy(), clim=(-1, 1))
|
|
65
|
-
cax3 = ax3.matshow(prod.numpy(), clim=(threshold, 1))
|
|
66
|
-
|
|
67
|
-
# Set titles
|
|
68
|
-
ax1.set_title(f'A: {mat.size(0)} \u00D7 {mat.size(1)}')
|
|
69
|
-
ax2.set_title(f'a^(i): {vect.numel()}')
|
|
70
|
-
ax3.set_title(f'p: {prod.numel()}')
|
|
71
|
-
|
|
72
|
-
# Remove xticks for vectors
|
|
73
|
-
ax2.set_xticks(tuple())
|
|
74
|
-
ax3.set_xticks(tuple())
|
|
75
|
-
|
|
76
|
-
# Plot colourbars
|
|
77
|
-
fig.colorbar(cax1, ax=ax2)
|
|
78
|
-
fig.colorbar(cax3, ax=ax3)
|
|
79
|
-
|
|
80
|
-
# Fix y-axis limits
|
|
81
|
-
ax1.set_ylim(bottom=max(len(prod), len(vect)) - 0.5)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
colors = dict(
|
|
85
|
-
aqua='#8dd3c7',
|
|
86
|
-
yellow='#ffffb3',
|
|
87
|
-
lavender='#bebada',
|
|
88
|
-
red='#fb8072',
|
|
89
|
-
blue='#80b1d3',
|
|
90
|
-
orange='#fdb462',
|
|
91
|
-
green='#b3de69',
|
|
92
|
-
pink='#fccde5',
|
|
93
|
-
grey='#d9d9d9',
|
|
94
|
-
violet='#bc80bd',
|
|
95
|
-
unk1='#ccebc5',
|
|
96
|
-
unk2='#ffed6f',
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _cstr(s, color='black'):
|
|
101
|
-
if s == ' ':
|
|
102
|
-
return f'<text style=color:#000;padding-left:10px;background-color:{color}> </text>'
|
|
103
|
-
else:
|
|
104
|
-
return f'<text style=color:#000;background-color:{color}>{s} </text>'
|
|
105
|
-
|
|
106
|
-
# print html
|
|
107
|
-
def _print_color(t):
|
|
108
|
-
display(HTML(''.join([_cstr(ti, color=ci) for ti, ci in t])))
|
|
109
|
-
|
|
110
|
-
# get appropriate color for value
|
|
111
|
-
def _get_clr(value):
|
|
112
|
-
colors = ('#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8',
|
|
113
|
-
'#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
|
|
114
|
-
'#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
|
|
115
|
-
'#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e')
|
|
116
|
-
value = int((value * 100) / 5)
|
|
117
|
-
if value == len(colors): value -= 1 # fixing bugs...
|
|
118
|
-
return colors[value]
|
|
119
|
-
|
|
120
|
-
def _visualise_values(output_values, result_list):
|
|
121
|
-
text_colours = []
|
|
122
|
-
for i in range(len(output_values)):
|
|
123
|
-
text = (result_list[i], _get_clr(output_values[i]))
|
|
124
|
-
text_colours.append(text)
|
|
125
|
-
_print_color(text_colours)
|
|
126
|
-
|
|
127
|
-
def print_colourbar():
|
|
128
|
-
color_range = torch.linspace(-2.5, 2.5, 20)
|
|
129
|
-
to_print = [(f'{x:.2f}', _get_clr((x+2.5)/5)) for x in color_range]
|
|
130
|
-
_print_color(to_print)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# Let's only focus on the last time step for now
|
|
134
|
-
# First, the cell state (Long term memory)
|
|
135
|
-
def plot_state(data, state, b, decoder):
|
|
136
|
-
actual_data = decoder(data[b, :, :].numpy())
|
|
137
|
-
seq_len = len(actual_data)
|
|
138
|
-
seq_len_w_pad = len(state)
|
|
139
|
-
for s in range(state.size(2)):
|
|
140
|
-
states = torch.sigmoid(state[:, b, s])
|
|
141
|
-
_visualise_values(states[seq_len_w_pad - seq_len:], list(actual_data))
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import torch
|
|
2
|
-
import math
|
|
3
|
-
def spiral_data(N=1000, D=2, C=3):
|
|
4
|
-
X = torch.zeros(N * C, D)
|
|
5
|
-
y = torch.zeros(N * C, dtype=torch.long)
|
|
6
|
-
for c in range(C):
|
|
7
|
-
index = 0
|
|
8
|
-
t = torch.linspace(0, 1, N)
|
|
9
|
-
# When c = 0 and t = 0: start of linspace
|
|
10
|
-
# When c = 0 and t = 1: end of linpace
|
|
11
|
-
# This inner_var is for the formula inside sin() and cos() like sin(inner_var) and cos(inner_Var)
|
|
12
|
-
inner_var = torch.linspace(
|
|
13
|
-
# When t = 0
|
|
14
|
-
(2 * math.pi / C) * (c),
|
|
15
|
-
# When t = 1
|
|
16
|
-
(2 * math.pi / C) * (2 + c),
|
|
17
|
-
N
|
|
18
|
-
) + torch.randn(N) * 0.2
|
|
19
|
-
|
|
20
|
-
for ix in range(N * c, N * (c + 1)):
|
|
21
|
-
X[ix] = t[index] * torch.FloatTensor((
|
|
22
|
-
math.sin(inner_var[index]), math.cos(inner_var[index])
|
|
23
|
-
))
|
|
24
|
-
y[ix] = c
|
|
25
|
-
index += 1
|
|
26
|
-
|
|
27
|
-
return (X, y)
|
|
Binary file
|
|
Binary file
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
def group_texts(examples):
|
|
2
|
-
# Concatenate all texts.
|
|
3
|
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
|
4
|
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
|
5
|
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
|
6
|
-
# customize this part to your needs.
|
|
7
|
-
total_length = (total_length // block_size) * block_size
|
|
8
|
-
# Split by chunks of max_len.
|
|
9
|
-
result = {
|
|
10
|
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
|
11
|
-
for k, t in concatenated_examples.items()
|
|
12
|
-
}
|
|
13
|
-
result["labels"] = result["input_ids"].copy()
|
|
14
|
-
return result
|
|
15
|
-
|
|
16
|
-
def whole_word_masking_data_collator(features):
|
|
17
|
-
from transformers import default_data_collator
|
|
18
|
-
for feature in features:
|
|
19
|
-
word_ids = feature.pop("word_ids")
|
|
20
|
-
|
|
21
|
-
# Create a map between words and corresponding token indices
|
|
22
|
-
mapping = collections.defaultdict(list)
|
|
23
|
-
current_word_index = -1
|
|
24
|
-
current_word = None
|
|
25
|
-
for idx, word_id in enumerate(word_ids):
|
|
26
|
-
if word_id is not None:
|
|
27
|
-
if word_id != current_word:
|
|
28
|
-
current_word = word_id
|
|
29
|
-
current_word_index += 1
|
|
30
|
-
mapping[current_word_index].append(idx)
|
|
31
|
-
|
|
32
|
-
# Randomly mask words
|
|
33
|
-
mask = np.random.binomial(1, wwm_probability, (len(mapping),))
|
|
34
|
-
input_ids = feature["input_ids"]
|
|
35
|
-
labels = feature["labels"]
|
|
36
|
-
new_labels = [-100] * len(labels)
|
|
37
|
-
for word_id in np.where(mask)[0]:
|
|
38
|
-
word_id = word_id.item()
|
|
39
|
-
for idx in mapping[word_id]:
|
|
40
|
-
new_labels[idx] = labels[idx]
|
|
41
|
-
input_ids[idx] = tokenizer.mask_token_id
|
|
42
|
-
feature["labels"] = new_labels
|
|
43
|
-
|
|
44
|
-
return default_data_collator(features)
|
|
45
|
-
|
|
46
|
-
if __name__ == "__main__2":
|
|
47
|
-
|
|
48
|
-
from transformers import AutoModelForMaskedLM
|
|
49
|
-
from transformers import AutoTokenizer
|
|
50
|
-
import torch
|
|
51
|
-
|
|
52
|
-
model_checkpoint = "distilbert-base-uncased"
|
|
53
|
-
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
|
|
54
|
-
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
|
55
|
-
|
|
56
|
-
text = "This is a great [MASK]."
|
|
57
|
-
|
|
58
|
-
inputs = tokenizer(text, return_tensors="pt")
|
|
59
|
-
token_logits = model(**inputs).logits
|
|
60
|
-
# Find the location of [MASK] and extract its logits
|
|
61
|
-
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
|
|
62
|
-
mask_token_logits = token_logits[0, mask_token_index, :]
|
|
63
|
-
# Pick the [MASK] candidates with the highest logits
|
|
64
|
-
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
|
65
|
-
|
|
66
|
-
for token in top_5_tokens:
|
|
67
|
-
print(f"'>>> {text.replace(tokenizer.mask_token, tokenizer.decode([token]))}'")
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
File without changes
|