@objctp/opencode-shell-routines 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +114 -0
- package/agents/shell-architect.md +88 -0
- package/agents/shell-expert.md +60 -0
- package/commands/shell-audit.md +47 -0
- package/commands/shell-batch-exec.md +48 -0
- package/commands/shell-new.md +57 -0
- package/commands/shell-routines-setup.md +66 -0
- package/commands/shell-test-run.md +46 -0
- package/opencode.json +19 -0
- package/package.json +34 -0
- package/plugins/shell-hooks.ts +150 -0
- package/scripts/lib-batch.sh +297 -0
- package/scripts/lib-common.sh +332 -0
- package/skills/shell-batch-operations/SKILL.md +97 -0
- package/skills/shell-batch-operations/assets/batch-template.sh +124 -0
- package/skills/shell-batch-operations/examples/data-pipeline.sh +157 -0
- package/skills/shell-batch-operations/examples/file-batch.sh +140 -0
- package/skills/shell-batch-operations/references/decision-tree.md +53 -0
- package/skills/shell-best-practices/SKILL.md +313 -0
- package/skills/shell-best-practices/assets/library.sh +142 -0
- package/skills/shell-best-practices/assets/minimal.sh +54 -0
- package/skills/shell-best-practices/assets/posix.sh +180 -0
- package/skills/shell-best-practices/assets/standard.sh +203 -0
- package/skills/shell-best-practices/references/patterns.md +386 -0
- package/skills/shell-best-practices/references/security.md +195 -0
- package/skills/shell-debugging/SKILL.md +115 -0
- package/skills/shell-debugging/examples/debug-session.md +165 -0
- package/skills/shell-debugging/references/debugging-guide.md +336 -0
- package/skills/shell-profiling/SKILL.md +154 -0
- package/skills/shell-profiling/examples/profile-session.md +225 -0
- package/skills/shell-profiling/references/optimisation-patterns.md +373 -0
- package/skills/shell-profiling/references/profiling-tools.md +318 -0
- package/skills/shell-profiling/scripts/bench.sh +82 -0
- package/skills/shell-profiling/scripts/trace-aggregate.sh +34 -0
- package/skills/shell-review/SKILL.md +61 -0
- package/skills/shell-review/examples/sample-review.md +42 -0
- package/skills/shell-review/references/guidelines.md +48 -0
- package/skills/shell-review/references/review-template.md +56 -0
- package/skills/shell-security/SKILL.md +128 -0
- package/skills/shell-security/examples/dangerous-command-review.md +231 -0
- package/skills/shell-security/examples/secure-script-example.sh +317 -0
- package/skills/shell-security/references/dangerous-commands.md +561 -0
- package/skills/shell-security/references/security-patterns.md +30 -0
- package/skills/shell-security/references/sensitive-files.md +525 -0
- package/skills/shell-security/scripts/security-audit.sh +208 -0
- package/skills/shell-test/SKILL.md +237 -0
- package/skills/shell-test/examples/test-example.md +74 -0
- package/skills/shell-test/references/advanced-patterns.md +52 -0
- package/skills/shell-test/references/assertions.md +184 -0
- package/skills/shell-test/references/test-template.md +60 -0
- package/skills/shell-test/scripts/public-coverage.sh +93 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# Example: Profiling a Slow Log-Processing Script
|
|
2
|
+
|
|
3
|
+
## Scenario
|
|
4
|
+
|
|
5
|
+
A script that filters log entries by date range, extracts fields, and computes a summary is running slowly on a 500,000-line log file. The script produces correct output but takes over 30 seconds. The goal is to bring it under 5 seconds.
|
|
6
|
+
|
|
7
|
+
## The Script
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
#!/usr/bin/env bash
|
|
11
|
+
set -euo pipefail
|
|
12
|
+
|
|
13
|
+
# analyse-logs.sh -- Summarise error counts by service
|
|
14
|
+
# Usage: analyse-logs.sh <start-date> <end-date> <logfile>
|
|
15
|
+
# Dates in YYYY-MM-DD format
|
|
16
|
+
|
|
17
|
+
start_date="${1:?Usage: $0 <start-date> <end-date> <logfile>}"
|
|
18
|
+
end_date="${2:?Usage: $0 <start-date> <end-date> <logfile>}"
|
|
19
|
+
logfile="${3:?Usage: $0 <start-date> <end-date> <logfile>}"
|
|
20
|
+
|
|
21
|
+
declare -A service_counts
|
|
22
|
+
total_errors=0
|
|
23
|
+
|
|
24
|
+
while IFS= read -r line; do
|
|
25
|
+
# Extract date from line: "2025-01-15 08:23:41 [ERROR] ..."
|
|
26
|
+
line_date=$(echo "$line" | cut -d' ' -f1)
|
|
27
|
+
|
|
28
|
+
# Check date range
|
|
29
|
+
if [[ "$line_date" < "$start_date" || "$line_date" > "$end_date" ]]; then
|
|
30
|
+
continue
|
|
31
|
+
fi
|
|
32
|
+
|
|
33
|
+
# Extract severity
|
|
34
|
+
severity=$(echo "$line" | grep -o '\[[A-Z]\+\]' | tr -d '[]')
|
|
35
|
+
|
|
36
|
+
if [[ "$severity" != "ERROR" ]]; then
|
|
37
|
+
continue
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Extract service name (field after severity)
|
|
41
|
+
service=$(echo "$line" | cut -d' ' -f4)
|
|
42
|
+
|
|
43
|
+
# Count
|
|
44
|
+
(( service_counts["$service"]++ ))
|
|
45
|
+
(( total_errors++ ))
|
|
46
|
+
done < "$logfile"
|
|
47
|
+
|
|
48
|
+
# Print summary
|
|
49
|
+
echo "Error summary: $start_date to $end_date"
|
|
50
|
+
echo "Total errors: $total_errors"
|
|
51
|
+
echo ""
|
|
52
|
+
for service in "${!service_counts[@]}"; do
|
|
53
|
+
echo " $service: ${service_counts[$service]}"
|
|
54
|
+
done
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Step 1: Baseline Measurement
|
|
58
|
+
|
|
59
|
+
Run with `time` to establish the baseline:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
$ time bash analyse-logs.sh 2025-01-01 2025-01-31 access.log
|
|
63
|
+
|
|
64
|
+
Error summary: 2025-01-01 to 2025-01-31
|
|
65
|
+
Total errors: 3421
|
|
66
|
+
auth-service: 1204
|
|
67
|
+
payment-gateway: 987
|
|
68
|
+
user-api: 623
|
|
69
|
+
...
|
|
70
|
+
|
|
71
|
+
real 0m32.456s
|
|
72
|
+
user 0m28.123s
|
|
73
|
+
sys 0m4.102s
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
32 seconds. The high user time relative to wall-clock time suggests CPU-bound work, likely from spawning many external processes.
|
|
77
|
+
|
|
78
|
+
## Step 2: Xtrace Profiling with Timestamps
|
|
79
|
+
|
|
80
|
+
Add timestamped tracing to a copy of the script. Insert these lines after `set -euo pipefail`:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
exec 42>/tmp/analyse-logs.trace.log
|
|
84
|
+
BASH_XTRACEFD=42
|
|
85
|
+
PS4='+ ${EPOCHREALTIME} ${BASH_SOURCE}:${LINENO} ${FUNCNAME[0]:-main} '
|
|
86
|
+
set -x
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Run the script again. The trace log now contains timestamped entries:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
$ head -5 /tmp/analyse-logs.trace.log
|
|
93
|
+
+ 1709654321.001234 analyse-logs.sh:12 main
|
|
94
|
+
+ 1709654321.001456 analyse-logs.sh:19 main + IFS= read -r line
|
|
95
|
+
+ 1709654321.002001 analyse-logs.sh:22 main ++ echo '2025-01-15 08:23:41 [ERROR] auth-service ...'
|
|
96
|
+
+ 1709654321.002345 analyse-logs.sh:22 main ++ cut '-d ' -f1
|
|
97
|
+
+ 1709654321.003100 analyse-logs.sh:22 main + line_date=2025-01-15
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Step 3: Identify the Hotspot
|
|
101
|
+
|
|
102
|
+
Post-process the trace to find the slowest lines:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
scripts/trace-aggregate.sh /tmp/analyse-logs.trace.log 10
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Output:
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
12.3450 s analyse-logs.sh:22 (called 500000 times)
|
|
112
|
+
8.7650 s analyse-logs.sh:25 (called 350000 times)
|
|
113
|
+
5.4320 s analyse-logs.sh:29 (called 142000 times)
|
|
114
|
+
3.2100 s analyse-logs.sh:33 (called 3421 times)
|
|
115
|
+
...
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Line 22 (`echo "$line" | cut -d' ' -f1`) is called 500,000 times and accounts for 12 seconds. Line 25 (`grep -o`) accounts for another 8.7 seconds. Together, these subshell invocations in the tight loop dominate execution time.
|
|
119
|
+
|
|
120
|
+
> In this case, xtrace profiling was sufficient to identify the hotspot. For I/O-bound problems where the bottleneck is file operations or network calls, the deep-dive step (Step 5 in the workflow: `strace -c`, `/usr/bin/time -v`) would be the next step to quantify syscall overhead.
|
|
121
|
+
|
|
122
|
+
## Step 4: Apply Optimisation
|
|
123
|
+
|
|
124
|
+
Replace the external commands with parameter expansion and bash builtins. Here is the optimised loop body:
|
|
125
|
+
|
|
126
|
+
**Before (lines 19--38):**
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
while IFS= read -r line; do
|
|
130
|
+
line_date=$(echo "$line" | cut -d' ' -f1)
|
|
131
|
+
|
|
132
|
+
if [[ "$line_date" < "$start_date" || "$line_date" > "$end_date" ]]; then
|
|
133
|
+
continue
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
severity=$(echo "$line" | grep -o '\[[A-Z]\+\]' | tr -d '[]')
|
|
137
|
+
|
|
138
|
+
if [[ "$severity" != "ERROR" ]]; then
|
|
139
|
+
continue
|
|
140
|
+
fi
|
|
141
|
+
|
|
142
|
+
service=$(echo "$line" | cut -d' ' -f4)
|
|
143
|
+
(( service_counts["$service"]++ ))
|
|
144
|
+
(( total_errors++ ))
|
|
145
|
+
done < "$logfile"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**After:**
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
while IFS=' ' read -r line_date _time severity_raw service_rest; do
|
|
152
|
+
# Filter date range
|
|
153
|
+
if [[ "$line_date" < "$start_date" || "$line_date" > "$end_date" ]]; then
|
|
154
|
+
continue
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
# Check severity -- severity_raw looks like "[ERROR]"
|
|
158
|
+
if [[ "$severity_raw" != "[ERROR]" ]]; then
|
|
159
|
+
continue
|
|
160
|
+
fi
|
|
161
|
+
|
|
162
|
+
# Extract service name (first word of remaining fields)
|
|
163
|
+
read -r service _ <<< "$service_rest"
|
|
164
|
+
(( service_counts["$service"]++ ))
|
|
165
|
+
(( total_errors++ ))
|
|
166
|
+
done < "$logfile"
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**What changed:**
|
|
170
|
+
|
|
171
|
+
1. `IFS=' ' read -r line_date _time severity_raw service_rest` splits the line into fields in a single `read` call, replacing three `$(echo ... | cut)` and one `$(echo ... | grep | tr)` pipeline -- four subshell invocations per line reduced to zero.
|
|
172
|
+
2. The severity check compares directly against `[ERROR]` instead of extracting and stripping brackets.
|
|
173
|
+
3. The service extraction uses a second `read` with here-string instead of `cut`.
|
|
174
|
+
|
|
175
|
+
## Step 5: Benchmark Before/After
|
|
176
|
+
|
|
177
|
+
Using hyperfine:
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
$ hyperfine --warmup 2 --runs 5 \
|
|
181
|
+
'bash analyse-logs-before.sh 2025-01-01 2025-01-31 access.log' \
|
|
182
|
+
'bash analyse-logs-after.sh 2025-01-01 2025-01-31 access.log'
|
|
183
|
+
|
|
184
|
+
Benchmark 1: bash analyse-logs-before.sh ...
|
|
185
|
+
Time (mean +/- sd): 32.118 s +/- 0.891 s
|
|
186
|
+
|
|
187
|
+
Benchmark 2: bash analyse-logs-after.sh ...
|
|
188
|
+
Time (mean +/- sd): 3.452 s +/- 0.134 s
|
|
189
|
+
|
|
190
|
+
Summary
|
|
191
|
+
bash analyse-logs-after.sh ran 9.30 +/- 0.48 times faster than bash analyse-logs-before.sh
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Step 6: Summary
|
|
195
|
+
|
|
196
|
+
| Metric | Before | After | Improvement |
|
|
197
|
+
| --------------- | --------- | -------- | ------------------ |
|
|
198
|
+
| Wall-clock time | 32.1 s | 3.5 s | 9.3x faster |
|
|
199
|
+
| Subshells/line | 4 | 0 | eliminated |
|
|
200
|
+
| External cmds | cut, grep, tr | read only | builtin only |
|
|
201
|
+
|
|
202
|
+
**Pattern applied:** Subshell elimination -- replaced `$(echo ... | cmd)` pipelines with `read` field splitting and parameter expansion.
|
|
203
|
+
|
|
204
|
+
**Trade-off:** The optimised version requires that log lines follow the expected space-delimited format. If the format changes (e.g., quoted fields containing spaces), the `read`-based splitting would need adjustment. This is acceptable because the original `cut -d' '` approach had the same limitation.
|
|
205
|
+
|
|
206
|
+
## Further Optimisation (Optional)
|
|
207
|
+
|
|
208
|
+
For even faster performance on very large files, replace the bash loop entirely with a single `awk` pass:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
awk -v start="$start_date" -v end="$end_date" '
|
|
212
|
+
$1 >= start && $1 <= end && $3 == "[ERROR]" {
|
|
213
|
+
count[$4]++
|
|
214
|
+
total++
|
|
215
|
+
}
|
|
216
|
+
END {
|
|
217
|
+
printf "Error summary: %s to %s\n", start, end
|
|
218
|
+
printf "Total errors: %d\n\n", total
|
|
219
|
+
for (svc in count)
|
|
220
|
+
printf " %s: %d\n", svc, count[svc]
|
|
221
|
+
}
|
|
222
|
+
' "$logfile"
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
This processes the file in C without returning to the shell between lines. Typical result: under 0.5 seconds for 500,000 lines. The trade-off is moving logic out of bash into awk, which reduces readability for team members unfamiliar with awk.
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
# Shell Optimisation Patterns
|
|
2
|
+
|
|
3
|
+
Each pattern shows a "before" (slow) and "after" (fast) version with an explanation.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Subshell Elimination
|
|
8
|
+
|
|
9
|
+
Every `$(...)` or backtick invocation forks a child process. In a loop of 10,000 iterations, that is 10,000 extra processes.
|
|
10
|
+
|
|
11
|
+
### Replace external commands with parameter expansion
|
|
12
|
+
|
|
13
|
+
**Before:**
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
for path in "${paths[@]}"; do
|
|
17
|
+
base=$(basename "$path")
|
|
18
|
+
dir=$(dirname "$path")
|
|
19
|
+
ext="${path##*.}"
|
|
20
|
+
echo "$dir/$base has extension $ext"
|
|
21
|
+
done
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**After:**
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
for path in "${paths[@]}"; do
|
|
28
|
+
base="${path##*/}"
|
|
29
|
+
dir="${path%/*}"
|
|
30
|
+
ext="${path##*.}"
|
|
31
|
+
echo "$dir/$base has extension $ext"
|
|
32
|
+
done
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**Why:** `basename` and `dirname` each fork a process. Parameter expansion `${path##*/}` and `${path%/*}` are bash builtins -- no fork, no exec, no I/O.
|
|
36
|
+
|
|
37
|
+
### Use `read` with here-string instead of piping to commands
|
|
38
|
+
|
|
39
|
+
**Before:**
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
echo "$line" | cut -d: -f2
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**After:**
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
IFS=: read -r _ field2 _ <<< "$line"
|
|
49
|
+
echo "$field2"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Why:** The pipe (`|`) creates a subshell for both sides. The here-string (`<<<`) is a builtin that avoids the fork.
|
|
53
|
+
|
|
54
|
+
### Process substitution vs pipe into loops
|
|
55
|
+
|
|
56
|
+
**Before:**
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
total=0
|
|
60
|
+
grep 'pattern' data.txt | while IFS= read -r line; do
|
|
61
|
+
(( total += line ))
|
|
62
|
+
done
|
|
63
|
+
echo "$total" # 0 -- subshell lost the variable
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**After:**
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
total=0
|
|
70
|
+
while IFS= read -r line; do
|
|
71
|
+
(( total += line ))
|
|
72
|
+
done < <(grep 'pattern' data.txt)
|
|
73
|
+
echo "$total" # correct value
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Why:** The pipe runs `while` in a subshell, losing variable changes. Process substitution `<(...)` runs `grep` in a subshell instead, keeping the `while` loop in the main shell.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Builtin vs External Commands
|
|
81
|
+
|
|
82
|
+
### `[[ ]]` vs `[ ]` vs `test`
|
|
83
|
+
|
|
84
|
+
**Before:**
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
if [ "$status" = "ok" ] && [ "$count" -gt 0 ]; then
|
|
88
|
+
echo "valid"
|
|
89
|
+
fi
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**After:**
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
if [[ "$status" == "ok" && "$count" -gt 0 ]]; then
|
|
96
|
+
echo "valid"
|
|
97
|
+
fi
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Why:** `[[ ]]` is a bash builtin that supports `&&` and `||` inside the expression, avoiding multiple `[ ]` invocations and the associated parsing overhead. It also handles empty variables without quoting.
|
|
101
|
+
|
|
102
|
+
### bash string manipulation vs sed/awk
|
|
103
|
+
|
|
104
|
+
**Before:**
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
lower=$(echo "$name" | tr 'A-Z' 'a-z')
|
|
108
|
+
no_spaces=$(echo "$lower" | sed 's/ /_/g')
|
|
109
|
+
clean=$(echo "$no_spaces" | sed 's/[^a-z0-9_]//g')
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**After:**
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
lower="${name,,}"
|
|
116
|
+
no_spaces="${lower// /_}"
|
|
117
|
+
clean="${no_spaces//[^a-z0-9_]/}"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Why:** Each pipe to `tr` or `sed` forks a process and opens a pipe. The bash parameter expansions `${var,,}`, `${var//pattern/replacement}` are builtins that operate on the string in memory. Three external processes become zero.
|
|
121
|
+
|
|
122
|
+
### `mapfile`/`readarray` vs `while read` loop
|
|
123
|
+
|
|
124
|
+
**Before:**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
lines=()
|
|
128
|
+
while IFS= read -r line; do
|
|
129
|
+
lines+=("$line")
|
|
130
|
+
done < file.txt
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
**After:**
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
mapfile -t lines < file.txt
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**Why:** `mapfile` reads the entire file into an array in a single builtin operation. The `while read` loop invokes the `read` builtin once per line, with per-iteration overhead.
|
|
140
|
+
|
|
141
|
+
### Arithmetic `(( ))` vs `expr`
|
|
142
|
+
|
|
143
|
+
**Before:**
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
result=$(expr "$a" + "$b")
|
|
147
|
+
remainder=$(expr "$a" % "$b")
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
**After:**
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
(( result = a + b ))
|
|
154
|
+
(( remainder = a % b ))
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Why:** `expr` is an external command that forks a process for each arithmetic operation. `(( ))` is a bash builtin.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## I/O Optimisation
|
|
162
|
+
|
|
163
|
+
### Redirect once vs per-line
|
|
164
|
+
|
|
165
|
+
**Before:**
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
for item in "${items[@]}"; do
|
|
169
|
+
echo "$item" >> output.txt
|
|
170
|
+
done
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**After:**
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
exec 3>output.txt
|
|
177
|
+
for item in "${items[@]}"; do
|
|
178
|
+
echo "$item" >&3
|
|
179
|
+
done
|
|
180
|
+
exec 3>&-
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**Why:** Each `>>` opens the file, seeks to the end, writes, and closes. Redirecting via `exec 3>` opens the file once; subsequent `>&3` writes skip the open/close cycle.
|
|
184
|
+
|
|
185
|
+
### Avoid useless `cat`
|
|
186
|
+
|
|
187
|
+
**Before:**
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
cat file.txt | grep 'pattern'
|
|
191
|
+
cat file.txt | while IFS= read -r line; do ...
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**After:**
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
grep 'pattern' file.txt
|
|
198
|
+
while IFS= read -r line; do ... done < file.txt
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Why:** `cat file | cmd` spawns a `cat` process and creates a pipe, both unnecessary when the command can read the file directly via redirection.
|
|
202
|
+
|
|
203
|
+
### Batch reads vs line-by-line
|
|
204
|
+
|
|
205
|
+
**Before:**
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
while IFS= read -r line; do
|
|
209
|
+
echo "$line"
|
|
210
|
+
done < large_file.txt > output.txt
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
**After:**
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# If the operation is a simple transformation, use a single tool pass
|
|
217
|
+
sed 's/old/new/g' large_file.txt > output.txt
|
|
218
|
+
|
|
219
|
+
# Or for more complex processing, use awk
|
|
220
|
+
awk '{gsub(/old/, "new"); print}' large_file.txt > output.txt
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Why:** Reading line-by-line in bash has per-iteration overhead (buffer management, builtin invocation). A single `sed` or `awk` pass processes the entire file in C without returning to the shell between lines.
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Loop Optimisation
|
|
228
|
+
|
|
229
|
+
### Process substitution instead of piping into loops
|
|
230
|
+
|
|
231
|
+
See **Subshell Elimination** above — `command | while ...` loses variables; use `while ... done < <(command)`.
|
|
232
|
+
|
|
233
|
+
### `shopt -s lastpipe`
|
|
234
|
+
|
|
235
|
+
**Before:**
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
# Variables set in the last command of a pipe are lost
|
|
239
|
+
printf '%s\n' "${items[@]}" | sort | while IFS= read -r item; do
|
|
240
|
+
(( count++ ))
|
|
241
|
+
done
|
|
242
|
+
echo "$count" # empty
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
**After:**
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
shopt -s lastpipe
|
|
249
|
+
count=0
|
|
250
|
+
printf '%s\n' "${items[@]}" | sort | while IFS= read -r item; do
|
|
251
|
+
(( count++ ))
|
|
252
|
+
done
|
|
253
|
+
echo "$count" # correct value
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Why:** `lastpipe` runs the last command of a pipeline in the current shell (not a subshell), preserving variable assignments. Requires job control to be disabled (which it is in scripts by default).
|
|
257
|
+
|
|
258
|
+
### `xargs -P` for parallel work
|
|
259
|
+
|
|
260
|
+
**Before:**
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
for url in "${urls[@]}"; do
|
|
264
|
+
curl -s "$url" > /dev/null
|
|
265
|
+
done
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
**After:**
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
printf '%s\n' "${urls[@]}" | xargs -P 8 -I{} curl -s "{}" > /dev/null
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
**Why:** The sequential loop processes one URL at a time. `xargs -P 8` runs up to 8 curl processes in parallel, bounded to avoid overwhelming the system.
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## String Processing
|
|
279
|
+
|
|
280
|
+
### Parameter expansion vs external tools for simple transformations
|
|
281
|
+
|
|
282
|
+
**Before:**
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
trimmed=$(echo "$var" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
**After:**
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
# Bash 4.4+ does not trim by default, so read into a temporary
|
|
292
|
+
read -r trimmed <<< "$var"
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
**Why:** The `sed` pipeline forks two processes. `read` with here-string is a single builtin.
|
|
296
|
+
|
|
297
|
+
### Single-pass awk vs multi-tool pipeline
|
|
298
|
+
|
|
299
|
+
**Before:**
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
grep 'ERROR' log.txt | cut -d' ' -f3 | sort | uniq -c | sort -rn | head -10
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
**After:**
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
awk '/ERROR/ {count[$3]++} END {for (k in count) print count[k], k}' log.txt \
|
|
309
|
+
| sort -rn | head -10
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Why:** The before version pipes through 5 processes (grep, cut, sort, uniq, sort). The after version does filtering, field extraction, and counting in a single `awk` pass, reducing the pipeline to 2 processes (awk + sort).
|
|
313
|
+
|
|
314
|
+
### Replace entire bash loop with a single awk pass
|
|
315
|
+
|
|
316
|
+
For data-heavy scripts, the single highest-impact optimisation is often replacing the entire bash loop with awk.
|
|
317
|
+
|
|
318
|
+
**Before:**
|
|
319
|
+
|
|
320
|
+
```bash
|
|
321
|
+
declare -A counts
|
|
322
|
+
total=0
|
|
323
|
+
while IFS= read -r line; do
|
|
324
|
+
date=$(echo "$line" | cut -d' ' -f1)
|
|
325
|
+
if [[ "$date" < "$start" || "$date" > "$end" ]]; then continue; fi
|
|
326
|
+
severity=$(echo "$line" | grep -o '\[[A-Z]\+\]' | tr -d '[]')
|
|
327
|
+
if [[ "$severity" != "ERROR" ]]; then continue; fi
|
|
328
|
+
service=$(echo "$line" | cut -d' ' -f4)
|
|
329
|
+
(( counts["$service"]++ ))
|
|
330
|
+
(( total++ ))
|
|
331
|
+
done < "$logfile"
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
**After:**
|
|
335
|
+
|
|
336
|
+
```bash
|
|
337
|
+
awk -v start="$start" -v end="$end" '
|
|
338
|
+
$1 >= start && $1 <= end && $3 == "[ERROR]" {
|
|
339
|
+
count[$4]++
|
|
340
|
+
total++
|
|
341
|
+
}
|
|
342
|
+
END {
|
|
343
|
+
printf "Total errors: %d\n", total
|
|
344
|
+
for (svc in count)
|
|
345
|
+
printf " %s: %d\n", svc, count[svc]
|
|
346
|
+
}
|
|
347
|
+
' "$logfile"
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
**Why:** The bash version spawns 3-4 external processes per line (echo, cut, grep, tr) across potentially hundreds of thousands of iterations. The awk version processes the entire file in C without returning to the shell between lines, typically achieving 50-100x speedups on large files. The trade-off is moving logic out of bash into awk, which reduces readability for team members unfamiliar with awk.
|
|
351
|
+
|
|
352
|
+
### `printf %s` concatenation vs repeated echo
|
|
353
|
+
|
|
354
|
+
**Before:**
|
|
355
|
+
|
|
356
|
+
```bash
|
|
357
|
+
echo "Header" > output.txt
|
|
358
|
+
echo "$line1" >> output.txt
|
|
359
|
+
echo "$line2" >> output.txt
|
|
360
|
+
echo "Footer" >> output.txt
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
**After:**
|
|
364
|
+
|
|
365
|
+
```bash
|
|
366
|
+
{
|
|
367
|
+
printf '%s\n' "Header"
|
|
368
|
+
printf '%s\n' "$line1" "$line2"
|
|
369
|
+
printf '%s\n' "Footer"
|
|
370
|
+
} > output.txt
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
**Why:** Grouping with `{ }` redirects once for the entire block. `printf '%s\n'` can print multiple arguments in a single call, reducing the number of builtin invocations. Using `printf` instead of `echo` also avoids portability issues with escape sequences.
|