learn_bash_from_session_data 1.0.10 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/enrichment_builtins.py +1266 -0
- package/scripts/enrichment_coreutils.py +1499 -0
- package/scripts/enrichment_netproc.py +2270 -0
- package/scripts/enrichment_netsys.py +1601 -0
- package/scripts/enrichment_pkgcomp.py +2185 -0
- package/scripts/enrichment_textdev.py +2016 -0
- package/scripts/html_generator.py +144 -1
- package/scripts/knowledge_base.py +11521 -5648
- package/scripts/merge_enrichment.py +272 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Merge enrichment data into knowledge_base.py COMMAND_DB.
|
|
4
|
+
|
|
5
|
+
Reads enrichment data from enrichment_*.py files and merges them into
|
|
6
|
+
the existing COMMAND_DB entries in knowledge_base.py. Adds missing fields
|
|
7
|
+
(use_cases, gotchas, man_url, related, difficulty) and supplements
|
|
8
|
+
existing flag definitions with extra_flags.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python scripts/merge_enrichment.py [--dry-run]
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
import re
|
|
16
|
+
import importlib
|
|
17
|
+
import importlib.util
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Dict, Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_enrichment_module(filepath: Path) -> Dict[str, Any]:
|
|
23
|
+
"""Load ENRICHMENT_DATA from a Python file."""
|
|
24
|
+
spec = importlib.util.spec_from_file_location("enrichment", filepath)
|
|
25
|
+
module = importlib.util.module_from_spec(spec)
|
|
26
|
+
spec.loader.exec_module(module)
|
|
27
|
+
return getattr(module, 'ENRICHMENT_DATA', {})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def collect_all_enrichments(scripts_dir: Path) -> Dict[str, Any]:
|
|
31
|
+
"""Collect enrichment data from all enrichment_*.py files."""
|
|
32
|
+
merged = {}
|
|
33
|
+
for enrichment_file in sorted(scripts_dir.glob("enrichment_*.py")):
|
|
34
|
+
print(f" Loading: {enrichment_file.name}")
|
|
35
|
+
data = load_enrichment_module(enrichment_file)
|
|
36
|
+
print(f" -> {len(data)} commands")
|
|
37
|
+
for cmd_name, cmd_data in data.items():
|
|
38
|
+
if cmd_name in merged:
|
|
39
|
+
# Merge: later files can supplement but not overwrite
|
|
40
|
+
for key, value in cmd_data.items():
|
|
41
|
+
if key not in merged[cmd_name] or not merged[cmd_name][key]:
|
|
42
|
+
merged[cmd_name][key] = value
|
|
43
|
+
else:
|
|
44
|
+
merged[cmd_name] = cmd_data
|
|
45
|
+
return merged
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def merge_into_knowledge_base(kb_path: Path, enrichments: Dict[str, Any], dry_run: bool = False) -> int:
|
|
49
|
+
"""
|
|
50
|
+
Merge enrichment data into knowledge_base.py by modifying COMMAND_DB entries.
|
|
51
|
+
|
|
52
|
+
Strategy: For each command in enrichments, find its entry in COMMAND_DB and
|
|
53
|
+
insert the enrichment fields before the closing brace of that entry.
|
|
54
|
+
|
|
55
|
+
Returns number of commands enriched.
|
|
56
|
+
"""
|
|
57
|
+
content = kb_path.read_text(encoding='utf-8')
|
|
58
|
+
original_content = content
|
|
59
|
+
enriched_count = 0
|
|
60
|
+
fields_to_add = ['man_url', 'use_cases', 'gotchas', 'related', 'difficulty']
|
|
61
|
+
|
|
62
|
+
for cmd_name, enrichment in enrichments.items():
|
|
63
|
+
# Find this command's entry in COMMAND_DB
|
|
64
|
+
# Pattern: "cmd_name": { ... },
|
|
65
|
+
# We look for the closing "}, " or "},\n" of this entry
|
|
66
|
+
|
|
67
|
+
# Find the start of this command's dict entry
|
|
68
|
+
# Handle both regular command names and special ones like "."
|
|
69
|
+
escaped_name = re.escape(cmd_name)
|
|
70
|
+
entry_pattern = rf' "{escaped_name}": \{{'
|
|
71
|
+
match = re.search(entry_pattern, content)
|
|
72
|
+
if not match:
|
|
73
|
+
print(f" WARNING: Command '{cmd_name}' not found in COMMAND_DB, skipping")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
entry_start = match.start()
|
|
77
|
+
|
|
78
|
+
# Find the closing of this entry by counting braces
|
|
79
|
+
brace_depth = 0
|
|
80
|
+
entry_end = -1
|
|
81
|
+
i = match.end() - 1 # Start at the opening brace
|
|
82
|
+
while i < len(content):
|
|
83
|
+
char = content[i]
|
|
84
|
+
if char == '{':
|
|
85
|
+
brace_depth += 1
|
|
86
|
+
elif char == '}':
|
|
87
|
+
brace_depth -= 1
|
|
88
|
+
if brace_depth == 0:
|
|
89
|
+
entry_end = i
|
|
90
|
+
break
|
|
91
|
+
# Skip string contents to avoid counting braces in strings
|
|
92
|
+
elif char == '"':
|
|
93
|
+
i += 1
|
|
94
|
+
while i < len(content) and content[i] != '"':
|
|
95
|
+
if content[i] == '\\':
|
|
96
|
+
i += 1 # Skip escaped char
|
|
97
|
+
i += 1
|
|
98
|
+
elif char == "'":
|
|
99
|
+
i += 1
|
|
100
|
+
while i < len(content) and content[i] != "'":
|
|
101
|
+
if content[i] == '\\':
|
|
102
|
+
i += 1
|
|
103
|
+
i += 1
|
|
104
|
+
i += 1
|
|
105
|
+
|
|
106
|
+
if entry_end == -1:
|
|
107
|
+
print(f" WARNING: Could not find end of entry for '{cmd_name}', skipping")
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# Extract the entry content
|
|
111
|
+
entry_content = content[entry_start:entry_end + 1]
|
|
112
|
+
|
|
113
|
+
# Check which fields are missing
|
|
114
|
+
additions = []
|
|
115
|
+
for field in fields_to_add:
|
|
116
|
+
if f'"{field}"' not in entry_content:
|
|
117
|
+
value = enrichment.get(field)
|
|
118
|
+
if value:
|
|
119
|
+
additions.append((field, value))
|
|
120
|
+
|
|
121
|
+
# Handle extra_flags: merge into existing flags dict
|
|
122
|
+
extra_flags = enrichment.get('extra_flags', {})
|
|
123
|
+
if extra_flags and '"flags"' in entry_content:
|
|
124
|
+
# Find the flags dict closing brace and add new flags before it
|
|
125
|
+
flags_additions = []
|
|
126
|
+
for flag, desc in extra_flags.items():
|
|
127
|
+
escaped_flag = flag.replace('"', '\\"')
|
|
128
|
+
if f'"{escaped_flag}"' not in entry_content:
|
|
129
|
+
flags_additions.append(f' "{escaped_flag}": "{desc}",')
|
|
130
|
+
if flags_additions:
|
|
131
|
+
# Find the closing of the flags dict within this entry
|
|
132
|
+
flags_match = re.search(r'"flags":\s*\{', entry_content)
|
|
133
|
+
if flags_match:
|
|
134
|
+
flags_start = flags_match.end()
|
|
135
|
+
# Find closing brace of flags
|
|
136
|
+
fb_depth = 1
|
|
137
|
+
fi = flags_start
|
|
138
|
+
while fi < len(entry_content) and fb_depth > 0:
|
|
139
|
+
if entry_content[fi] == '{':
|
|
140
|
+
fb_depth += 1
|
|
141
|
+
elif entry_content[fi] == '}':
|
|
142
|
+
fb_depth -= 1
|
|
143
|
+
elif entry_content[fi] == '"':
|
|
144
|
+
fi += 1
|
|
145
|
+
while fi < len(entry_content) and entry_content[fi] != '"':
|
|
146
|
+
if entry_content[fi] == '\\':
|
|
147
|
+
fi += 1
|
|
148
|
+
fi += 1
|
|
149
|
+
fi += 1
|
|
150
|
+
flags_end_pos = entry_start + fi - 1
|
|
151
|
+
# Insert new flags before the closing brace
|
|
152
|
+
flags_insert = '\n' + '\n'.join(flags_additions) + '\n '
|
|
153
|
+
content = content[:flags_end_pos] + flags_insert + content[flags_end_pos:]
|
|
154
|
+
# Recalculate entry_end since we modified content
|
|
155
|
+
entry_end += len(flags_insert)
|
|
156
|
+
|
|
157
|
+
# Handle improved_description: replace existing description
|
|
158
|
+
improved_desc = enrichment.get('improved_description')
|
|
159
|
+
if improved_desc and '"description"' in entry_content:
|
|
160
|
+
# Replace the existing description string
|
|
161
|
+
desc_pattern = rf'( "{escaped_name}": \{{[^}}]*?"description":\s*)"([^"]*(?:\\.[^"]*)*)"'
|
|
162
|
+
new_desc = improved_desc.replace('"', '\\"')
|
|
163
|
+
content = re.sub(desc_pattern, rf'\1"{new_desc}"', content, count=1)
|
|
164
|
+
|
|
165
|
+
if not additions:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
# Build the insertion text
|
|
169
|
+
insertion_lines = []
|
|
170
|
+
for field, value in additions:
|
|
171
|
+
if isinstance(value, str):
|
|
172
|
+
escaped_val = value.replace('"', '\\"')
|
|
173
|
+
insertion_lines.append(f' "{field}": "{escaped_val}",')
|
|
174
|
+
elif isinstance(value, list):
|
|
175
|
+
if all(isinstance(v, str) for v in value):
|
|
176
|
+
items = ', '.join(f'"{v}"' for v in value)
|
|
177
|
+
if len(items) < 80:
|
|
178
|
+
insertion_lines.append(f' "{field}": [{items}],')
|
|
179
|
+
else:
|
|
180
|
+
insertion_lines.append(f' "{field}": [')
|
|
181
|
+
for v in value:
|
|
182
|
+
escaped_v = v.replace('"', '\\"')
|
|
183
|
+
insertion_lines.append(f' "{escaped_v}",')
|
|
184
|
+
insertion_lines.append(f' ],')
|
|
185
|
+
|
|
186
|
+
if insertion_lines:
|
|
187
|
+
insertion = '\n' + '\n'.join(insertion_lines)
|
|
188
|
+
# Recalculate entry_end in current content
|
|
189
|
+
match2 = re.search(entry_pattern, content)
|
|
190
|
+
if match2:
|
|
191
|
+
brace_depth = 0
|
|
192
|
+
i2 = match2.end() - 1
|
|
193
|
+
while i2 < len(content):
|
|
194
|
+
char = content[i2]
|
|
195
|
+
if char == '{':
|
|
196
|
+
brace_depth += 1
|
|
197
|
+
elif char == '}':
|
|
198
|
+
brace_depth -= 1
|
|
199
|
+
if brace_depth == 0:
|
|
200
|
+
entry_end = i2
|
|
201
|
+
break
|
|
202
|
+
elif char == '"':
|
|
203
|
+
i2 += 1
|
|
204
|
+
while i2 < len(content) and content[i2] != '"':
|
|
205
|
+
if content[i2] == '\\':
|
|
206
|
+
i2 += 1
|
|
207
|
+
i2 += 1
|
|
208
|
+
elif char == "'":
|
|
209
|
+
i2 += 1
|
|
210
|
+
while i2 < len(content) and content[i2] != "'":
|
|
211
|
+
if content[i2] == '\\':
|
|
212
|
+
i2 += 1
|
|
213
|
+
i2 += 1
|
|
214
|
+
i2 += 1
|
|
215
|
+
|
|
216
|
+
# Insert before the closing brace
|
|
217
|
+
content = content[:entry_end] + insertion + '\n ' + content[entry_end:]
|
|
218
|
+
enriched_count += 1
|
|
219
|
+
|
|
220
|
+
if content != original_content:
|
|
221
|
+
if dry_run:
|
|
222
|
+
print(f"\n DRY RUN: Would enrich {enriched_count} commands")
|
|
223
|
+
# Show a diff summary
|
|
224
|
+
added_lines = len(content.splitlines()) - len(original_content.splitlines())
|
|
225
|
+
print(f" Would add ~{added_lines} lines")
|
|
226
|
+
else:
|
|
227
|
+
kb_path.write_text(content, encoding='utf-8')
|
|
228
|
+
print(f"\n Enriched {enriched_count} commands in {kb_path.name}")
|
|
229
|
+
|
|
230
|
+
return enriched_count
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def main():
|
|
234
|
+
dry_run = '--dry-run' in sys.argv
|
|
235
|
+
|
|
236
|
+
scripts_dir = Path(__file__).parent
|
|
237
|
+
kb_path = scripts_dir / 'knowledge_base.py'
|
|
238
|
+
|
|
239
|
+
if not kb_path.exists():
|
|
240
|
+
print(f"Error: {kb_path} not found")
|
|
241
|
+
return 1
|
|
242
|
+
|
|
243
|
+
print("Collecting enrichment data...")
|
|
244
|
+
enrichments = collect_all_enrichments(scripts_dir)
|
|
245
|
+
|
|
246
|
+
if not enrichments:
|
|
247
|
+
print("No enrichment data found. Run the research agents first.")
|
|
248
|
+
return 1
|
|
249
|
+
|
|
250
|
+
print(f"\nTotal enrichments: {len(enrichments)} commands")
|
|
251
|
+
print(f"\nMerging into {kb_path.name}{' (DRY RUN)' if dry_run else ''}...")
|
|
252
|
+
count = merge_into_knowledge_base(kb_path, enrichments, dry_run=dry_run)
|
|
253
|
+
|
|
254
|
+
if count > 0:
|
|
255
|
+
# Verify the file is still valid Python
|
|
256
|
+
if not dry_run:
|
|
257
|
+
print("\nVerifying syntax...")
|
|
258
|
+
try:
|
|
259
|
+
compile(kb_path.read_text(encoding='utf-8'), kb_path, 'exec')
|
|
260
|
+
print(" Syntax OK")
|
|
261
|
+
except SyntaxError as e:
|
|
262
|
+
print(f" SYNTAX ERROR: {e}")
|
|
263
|
+
print(" Reverting changes...")
|
|
264
|
+
# We'd need to keep a backup for this - for now just warn
|
|
265
|
+
return 1
|
|
266
|
+
|
|
267
|
+
print("\nDone.")
|
|
268
|
+
return 0
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
if __name__ == '__main__':
|
|
272
|
+
sys.exit(main())
|