claude-evolve 1.7.20 → 1.7.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-ideate +74 -51
- package/lib/csv_fixer.py +44 -5
- package/package.json +1 -1
package/bin/claude-evolve-ideate
CHANGED
|
@@ -95,66 +95,79 @@ call_ai_for_ideation() {
|
|
|
95
95
|
gen_num=1
|
|
96
96
|
fi
|
|
97
97
|
|
|
98
|
-
#
|
|
99
|
-
|
|
98
|
+
# Make a backup of the pre-populated temp CSV (which includes stub rows from caller)
|
|
99
|
+
# This preserves the stub rows that the caller added
|
|
100
|
+
local temp_csv_backup="${temp_csv_file}.backup"
|
|
100
101
|
if [[ -f "$temp_csv_file" ]]; then
|
|
101
|
-
|
|
102
|
+
cp "$temp_csv_file" "$temp_csv_backup"
|
|
102
103
|
else
|
|
103
|
-
|
|
104
|
+
echo "[ERROR] Temp CSV file not found at start: $temp_csv_file" >&2
|
|
105
|
+
return 1
|
|
104
106
|
fi
|
|
105
|
-
|
|
106
|
-
|
|
107
|
+
|
|
108
|
+
# Get the current row count before any modifications (from the pre-populated file with stubs)
|
|
109
|
+
local original_csv_count
|
|
110
|
+
original_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l | tr -d '[:space:]')
|
|
111
|
+
|
|
112
|
+
echo "[DEBUG] Pre-populated temp CSV has $original_csv_count rows (includes stub rows with placeholders)" >&2
|
|
113
|
+
|
|
107
114
|
# Get models for ideation
|
|
108
115
|
local model_list
|
|
109
116
|
model_list=$(get_models_for_command "ideate")
|
|
110
117
|
local models=()
|
|
111
118
|
read -ra models <<< "$model_list"
|
|
112
|
-
|
|
119
|
+
|
|
113
120
|
if [[ ${#models[@]} -eq 0 ]]; then
|
|
114
121
|
echo "[ERROR] No models configured for ideation" >&2
|
|
122
|
+
rm -f "$temp_csv_backup"
|
|
115
123
|
return 1
|
|
116
124
|
fi
|
|
117
|
-
|
|
125
|
+
|
|
118
126
|
# Calculate starting index for round-robin
|
|
119
127
|
local num_models=${#models[@]}
|
|
120
128
|
local start_index=$((gen_num % num_models))
|
|
121
|
-
|
|
129
|
+
|
|
122
130
|
# Create ordered list based on round-robin
|
|
123
131
|
local ordered_models=()
|
|
124
132
|
for ((i=0; i<num_models; i++)); do
|
|
125
133
|
local idx=$(((start_index + i) % num_models))
|
|
126
134
|
ordered_models+=("${models[$idx]}")
|
|
127
135
|
done
|
|
128
|
-
|
|
136
|
+
|
|
129
137
|
echo "[AI] Model order for ideate (round-robin): ${ordered_models[*]}" >&2
|
|
130
|
-
|
|
138
|
+
|
|
131
139
|
# Try each model until CSV changes
|
|
132
140
|
for model in "${ordered_models[@]}"; do
|
|
133
141
|
echo "[AI] Attempting ideate with $model" >&2
|
|
134
142
|
|
|
135
|
-
# Restore temp CSV before each attempt (in case previous model corrupted it)
|
|
136
|
-
# This
|
|
137
|
-
|
|
138
|
-
cp "$FULL_CSV_PATH" "$temp_csv_file"
|
|
139
|
-
# Recapture original count in case it changed
|
|
140
|
-
original_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l)
|
|
141
|
-
fi
|
|
143
|
+
# Restore temp CSV from backup before each attempt (in case previous model corrupted it)
|
|
144
|
+
# This preserves the stub rows that the caller pre-populated
|
|
145
|
+
cp "$temp_csv_backup" "$temp_csv_file"
|
|
142
146
|
|
|
143
147
|
# Call the model directly
|
|
144
148
|
local ai_output
|
|
145
149
|
ai_output=$(call_ai_model_configured "$model" "$prompt")
|
|
146
150
|
local ai_exit_code=$?
|
|
147
151
|
|
|
148
|
-
# Check if the file was modified
|
|
152
|
+
# Check if the file was modified correctly
|
|
149
153
|
if [[ -f "$temp_csv_file" ]]; then
|
|
150
154
|
local new_csv_count
|
|
151
|
-
new_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l)
|
|
155
|
+
new_csv_count=$(grep -v '^[[:space:]]*$' "$temp_csv_file" | tail -n +2 | wc -l | tr -d '[:space:]')
|
|
152
156
|
local added_count=$((new_csv_count - original_csv_count))
|
|
153
157
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
+
echo "[DEBUG] After $model: original=$original_csv_count, new=$new_csv_count, added=$added_count" >&2
|
|
159
|
+
|
|
160
|
+
# Check if row count is correct (should be same since we're editing stubs, not adding)
|
|
161
|
+
if [[ $new_csv_count -eq $original_csv_count ]]; then
|
|
162
|
+
# Count remaining placeholders - there should be none if AI did its job
|
|
163
|
+
local placeholder_count
|
|
164
|
+
placeholder_count=$(grep -c "PLACEHOLDER" "$temp_csv_file" 2>/dev/null || echo "0")
|
|
165
|
+
# Strip whitespace and ensure we have a clean integer
|
|
166
|
+
placeholder_count=$(echo "$placeholder_count" | tr -d '[:space:]')
|
|
167
|
+
placeholder_count=${placeholder_count:-0}
|
|
168
|
+
|
|
169
|
+
if [[ $placeholder_count -eq 0 ]]; then
|
|
170
|
+
echo "[INFO] CSV modified by $model: filled $expected_count placeholder rows ✓" >&2
|
|
158
171
|
|
|
159
172
|
# Post-process to ensure all description fields are quoted
|
|
160
173
|
local fixed_csv_file="${temp_csv_file}.fixed"
|
|
@@ -167,13 +180,22 @@ call_ai_for_ideation() {
|
|
|
167
180
|
echo "[WARN] CSV format validation failed, using original" >&2
|
|
168
181
|
fi
|
|
169
182
|
|
|
183
|
+
# Clean up backup file
|
|
184
|
+
rm -f "$temp_csv_backup"
|
|
185
|
+
|
|
170
186
|
# Echo the successful model name for caller to capture
|
|
171
187
|
echo "$model"
|
|
172
188
|
return 0
|
|
173
189
|
else
|
|
174
|
-
echo "[WARN] $model
|
|
175
|
-
# Continue to next model
|
|
190
|
+
echo "[WARN] $model left $placeholder_count placeholders unfilled - trying next model" >&2
|
|
191
|
+
# Continue to next model
|
|
176
192
|
fi
|
|
193
|
+
elif [[ $added_count -lt 0 ]]; then
|
|
194
|
+
echo "[WARN] $model deleted rows ($added_count) - trying next model" >&2
|
|
195
|
+
# Continue to next model
|
|
196
|
+
elif [[ $added_count -gt 0 ]]; then
|
|
197
|
+
echo "[WARN] $model added extra rows ($added_count) instead of editing stubs - trying next model" >&2
|
|
198
|
+
# Continue to next model
|
|
177
199
|
else
|
|
178
200
|
echo "[INFO] CSV unchanged after $model (exit code: $ai_exit_code)" >&2
|
|
179
201
|
# Log last few lines of AI output to help debug why it succeeded but didn't change the file
|
|
@@ -191,6 +213,7 @@ call_ai_for_ideation() {
|
|
|
191
213
|
done
|
|
192
214
|
|
|
193
215
|
# All models tried, none changed the file
|
|
216
|
+
rm -f "$temp_csv_backup"
|
|
194
217
|
echo "[ERROR] All AI models failed to generate ideas" >&2
|
|
195
218
|
return 1
|
|
196
219
|
}
|
|
@@ -357,11 +380,11 @@ validate_direct_csv_modification() {
|
|
|
357
380
|
# We need to track this before the AI runs by reading from the beginning state
|
|
358
381
|
# First, get a fresh count from the current main CSV (which reflects any previous operations in this session)
|
|
359
382
|
local current_original_count
|
|
360
|
-
current_original_count=$(grep -v '^[[:space:]]*$' "$FULL_CSV_PATH" | tail -n +2 | wc -l)
|
|
383
|
+
current_original_count=$(grep -v '^[[:space:]]*$' "$FULL_CSV_PATH" | tail -n +2 | wc -l | tr -d '[:space:]')
|
|
361
384
|
|
|
362
385
|
# Count data rows in the modified temp CSV
|
|
363
386
|
local new_count
|
|
364
|
-
new_count=$(grep -v '^[[:space:]]*$' "$temp_csv" | tail -n +2 | wc -l)
|
|
387
|
+
new_count=$(grep -v '^[[:space:]]*$' "$temp_csv" | tail -n +2 | wc -l | tr -d '[:space:]')
|
|
365
388
|
|
|
366
389
|
|
|
367
390
|
# Check if AI overwrote the file instead of appending
|
|
@@ -422,7 +445,7 @@ validate_direct_csv_modification() {
|
|
|
422
445
|
fi
|
|
423
446
|
|
|
424
447
|
# Get just the new entries (skip header and existing entries)
|
|
425
|
-
local original_line_count=$(wc -l < "$FULL_CSV_PATH")
|
|
448
|
+
local original_line_count=$(wc -l < "$FULL_CSV_PATH" | tr -d '[:space:]')
|
|
426
449
|
|
|
427
450
|
# Append only the new lines from temp CSV to the main CSV
|
|
428
451
|
tail -n +$((original_line_count + 1)) "$temp_csv" >> "$FULL_CSV_PATH"
|
|
@@ -494,9 +517,9 @@ validate_and_apply_csv_modification_old() {
|
|
|
494
517
|
|
|
495
518
|
# Validate the modified CSV has more entries than original
|
|
496
519
|
local original_count
|
|
497
|
-
original_count=$(wc -l < "$FULL_CSV_PATH")
|
|
520
|
+
original_count=$(wc -l < "$FULL_CSV_PATH" | tr -d '[:space:]')
|
|
498
521
|
local new_count
|
|
499
|
-
new_count=$(wc -l < "$temp_csv")
|
|
522
|
+
new_count=$(wc -l < "$temp_csv" | tr -d '[:space:]')
|
|
500
523
|
|
|
501
524
|
|
|
502
525
|
if [[ $new_count -le $original_count ]]; then
|
|
@@ -523,7 +546,7 @@ validate_and_apply_csv_modification_old() {
|
|
|
523
546
|
fi
|
|
524
547
|
|
|
525
548
|
# Get just the new entries (skip header and existing entries)
|
|
526
|
-
local original_line_count=$(wc -l < "$FULL_CSV_PATH")
|
|
549
|
+
local original_line_count=$(wc -l < "$FULL_CSV_PATH" | tr -d '[:space:]')
|
|
527
550
|
|
|
528
551
|
# Append only the new lines from temp CSV to the main CSV
|
|
529
552
|
tail -n +$((original_line_count + 1)) "$temp_csv" >> "$FULL_CSV_PATH"
|
|
@@ -645,9 +668,9 @@ IMPORTANT: Output the complete modified CSV file. Do not add any explanation or
|
|
|
645
668
|
|
|
646
669
|
# Validate the modified CSV has more entries than original
|
|
647
670
|
local original_count
|
|
648
|
-
original_count=$(wc -l < "$FULL_CSV_PATH")
|
|
671
|
+
original_count=$(wc -l < "$FULL_CSV_PATH" | tr -d '[:space:]')
|
|
649
672
|
local new_count
|
|
650
|
-
new_count=$(wc -l < "$temp_csv")
|
|
673
|
+
new_count=$(wc -l < "$temp_csv" | tr -d '[:space:]')
|
|
651
674
|
|
|
652
675
|
|
|
653
676
|
if [[ $new_count -le $original_count ]]; then
|
|
@@ -676,7 +699,7 @@ IMPORTANT: Output the complete modified CSV file. Do not add any explanation or
|
|
|
676
699
|
fi
|
|
677
700
|
|
|
678
701
|
# Get just the new entries (skip header and existing entries)
|
|
679
|
-
local original_line_count=$(wc -l < "$FULL_CSV_PATH")
|
|
702
|
+
local original_line_count=$(wc -l < "$FULL_CSV_PATH" | tr -d '[:space:]')
|
|
680
703
|
|
|
681
704
|
# Append only the new lines from temp CSV to the main CSV
|
|
682
705
|
tail -n +$((original_line_count + 1)) "$temp_csv" >> "$FULL_CSV_PATH"
|
|
@@ -987,7 +1010,7 @@ generate_novel_ideas_direct() {
|
|
|
987
1010
|
|
|
988
1011
|
# Count total lines in temp CSV (including header)
|
|
989
1012
|
local total_lines
|
|
990
|
-
total_lines=$(wc -l < "$temp_csv")
|
|
1013
|
+
total_lines=$(wc -l < "$temp_csv" | tr -d '[:space:]')
|
|
991
1014
|
local read_offset=$((total_lines - 25))
|
|
992
1015
|
if [[ $read_offset -lt 1 ]]; then
|
|
993
1016
|
read_offset=1
|
|
@@ -1108,6 +1131,10 @@ generate_hill_climbing_direct() {
|
|
|
1108
1131
|
local temp_csv="$FULL_EVOLUTION_DIR/temp-csv-$$.csv"
|
|
1109
1132
|
cp "$FULL_CSV_PATH" "$temp_csv"
|
|
1110
1133
|
|
|
1134
|
+
# Extract just the IDs from top performers for clarity (needed before pre-populating)
|
|
1135
|
+
local valid_parent_ids
|
|
1136
|
+
valid_parent_ids=$(echo "$top_performers" | cut -d',' -f1 | paste -sd ',' -)
|
|
1137
|
+
|
|
1111
1138
|
# Pre-populate the CSV with stub rows containing the correct IDs and parent IDs
|
|
1112
1139
|
echo "[INFO] Pre-populating CSV with stub rows: $required_ids_str"
|
|
1113
1140
|
# Use first parent as default for stubs (AI will adjust if needed)
|
|
@@ -1121,7 +1148,7 @@ generate_hill_climbing_direct() {
|
|
|
1121
1148
|
|
|
1122
1149
|
# Count total lines in temp CSV (including header)
|
|
1123
1150
|
local total_lines
|
|
1124
|
-
total_lines=$(wc -l < "$temp_csv")
|
|
1151
|
+
total_lines=$(wc -l < "$temp_csv" | tr -d '[:space:]')
|
|
1125
1152
|
local read_offset=$((total_lines - 25))
|
|
1126
1153
|
if [[ $read_offset -lt 1 ]]; then
|
|
1127
1154
|
read_offset=1
|
|
@@ -1130,10 +1157,6 @@ generate_hill_climbing_direct() {
|
|
|
1130
1157
|
# Get existing Python files for this generation to avoid ID collisions
|
|
1131
1158
|
local existing_py_files=$(get_existing_py_files_for_generation "$CURRENT_GENERATION")
|
|
1132
1159
|
|
|
1133
|
-
# Extract just the IDs from top performers for clarity
|
|
1134
|
-
local valid_parent_ids
|
|
1135
|
-
valid_parent_ids=$(echo "$top_performers" | cut -d',' -f1 | paste -sd ',' -)
|
|
1136
|
-
|
|
1137
1160
|
# Use relative paths and change to evolution directory so AI can access files
|
|
1138
1161
|
local temp_csv_basename=$(basename "$temp_csv")
|
|
1139
1162
|
|
|
@@ -1238,6 +1261,10 @@ generate_structural_mutation_direct() {
|
|
|
1238
1261
|
local temp_csv="$FULL_EVOLUTION_DIR/temp-csv-$$.csv"
|
|
1239
1262
|
cp "$FULL_CSV_PATH" "$temp_csv"
|
|
1240
1263
|
|
|
1264
|
+
# Extract just the IDs from top performers for clarity (needed before pre-populating)
|
|
1265
|
+
local valid_parent_ids
|
|
1266
|
+
valid_parent_ids=$(echo "$top_performers" | cut -d',' -f1 | paste -sd ',' -)
|
|
1267
|
+
|
|
1241
1268
|
# Pre-populate the CSV with stub rows
|
|
1242
1269
|
echo "[INFO] Pre-populating CSV with stub rows: $required_ids_str"
|
|
1243
1270
|
local first_parent_id
|
|
@@ -1250,7 +1277,7 @@ generate_structural_mutation_direct() {
|
|
|
1250
1277
|
|
|
1251
1278
|
# Count total lines in temp CSV (including header)
|
|
1252
1279
|
local total_lines
|
|
1253
|
-
total_lines=$(wc -l < "$temp_csv")
|
|
1280
|
+
total_lines=$(wc -l < "$temp_csv" | tr -d '[:space:]')
|
|
1254
1281
|
local read_offset=$((total_lines - 25))
|
|
1255
1282
|
if [[ $read_offset -lt 1 ]]; then
|
|
1256
1283
|
read_offset=1
|
|
@@ -1259,10 +1286,6 @@ generate_structural_mutation_direct() {
|
|
|
1259
1286
|
# Get existing Python files for this generation to avoid ID collisions
|
|
1260
1287
|
local existing_py_files=$(get_existing_py_files_for_generation "$CURRENT_GENERATION")
|
|
1261
1288
|
|
|
1262
|
-
# Extract just the IDs from top performers for clarity
|
|
1263
|
-
local valid_parent_ids
|
|
1264
|
-
valid_parent_ids=$(echo "$top_performers" | cut -d',' -f1 | paste -sd ',' -)
|
|
1265
|
-
|
|
1266
1289
|
# Use relative paths and change to evolution directory so AI can access files
|
|
1267
1290
|
local temp_csv_basename=$(basename "$temp_csv")
|
|
1268
1291
|
|
|
@@ -1358,6 +1381,10 @@ generate_crossover_direct() {
|
|
|
1358
1381
|
local temp_csv="$FULL_EVOLUTION_DIR/temp-csv-$$.csv"
|
|
1359
1382
|
cp "$FULL_CSV_PATH" "$temp_csv"
|
|
1360
1383
|
|
|
1384
|
+
# Extract just the IDs from top performers for clarity (needed before pre-populating)
|
|
1385
|
+
local valid_parent_ids
|
|
1386
|
+
valid_parent_ids=$(echo "$top_performers" | cut -d',' -f1 | paste -sd ',' -)
|
|
1387
|
+
|
|
1361
1388
|
# Pre-populate the CSV with stub rows
|
|
1362
1389
|
echo "[INFO] Pre-populating CSV with stub rows: $required_ids_str"
|
|
1363
1390
|
local first_parent_id
|
|
@@ -1370,7 +1397,7 @@ generate_crossover_direct() {
|
|
|
1370
1397
|
|
|
1371
1398
|
# Count total lines in temp CSV (including header)
|
|
1372
1399
|
local total_lines
|
|
1373
|
-
total_lines=$(wc -l < "$temp_csv")
|
|
1400
|
+
total_lines=$(wc -l < "$temp_csv" | tr -d '[:space:]')
|
|
1374
1401
|
local read_offset=$((total_lines - 25))
|
|
1375
1402
|
if [[ $read_offset -lt 1 ]]; then
|
|
1376
1403
|
read_offset=1
|
|
@@ -1379,10 +1406,6 @@ generate_crossover_direct() {
|
|
|
1379
1406
|
# Get existing Python files for this generation to avoid ID collisions
|
|
1380
1407
|
local existing_py_files=$(get_existing_py_files_for_generation "$CURRENT_GENERATION")
|
|
1381
1408
|
|
|
1382
|
-
# Extract just the IDs from top performers for clarity
|
|
1383
|
-
local valid_parent_ids
|
|
1384
|
-
valid_parent_ids=$(echo "$top_performers" | cut -d',' -f1 | paste -sd ',' -)
|
|
1385
|
-
|
|
1386
1409
|
# Use relative paths and change to evolution directory so AI can access files
|
|
1387
1410
|
local temp_csv_basename=$(basename "$temp_csv")
|
|
1388
1411
|
|
package/lib/csv_fixer.py
CHANGED
|
@@ -9,6 +9,31 @@ import csv
|
|
|
9
9
|
import sys
|
|
10
10
|
import re
|
|
11
11
|
|
|
12
|
+
def clean_candidate_id(candidate_id):
|
|
13
|
+
"""
|
|
14
|
+
Clean and normalize a candidate ID.
|
|
15
|
+
Returns (cleaned_id, was_modified)
|
|
16
|
+
"""
|
|
17
|
+
if not candidate_id or candidate_id == "id":
|
|
18
|
+
return candidate_id, False
|
|
19
|
+
|
|
20
|
+
original = candidate_id
|
|
21
|
+
cleaned = candidate_id
|
|
22
|
+
|
|
23
|
+
# Strip leading/trailing whitespace
|
|
24
|
+
cleaned = cleaned.strip()
|
|
25
|
+
|
|
26
|
+
# Remove any internal spaces (e.g., "gen01 -001" -> "gen01-001")
|
|
27
|
+
cleaned = re.sub(r'\s+', '', cleaned)
|
|
28
|
+
|
|
29
|
+
# Remove pipe characters and anything before them (line number artifacts)
|
|
30
|
+
if '|' in cleaned:
|
|
31
|
+
# Extract the part after the last pipe
|
|
32
|
+
parts = cleaned.split('|')
|
|
33
|
+
cleaned = parts[-1].strip()
|
|
34
|
+
|
|
35
|
+
return cleaned, (cleaned != original)
|
|
36
|
+
|
|
12
37
|
def is_valid_candidate_id(candidate_id):
|
|
13
38
|
"""
|
|
14
39
|
Check if a candidate ID is valid.
|
|
@@ -25,7 +50,7 @@ def is_valid_candidate_id(candidate_id):
|
|
|
25
50
|
if not candidate_id or candidate_id == "id":
|
|
26
51
|
return True # Header row
|
|
27
52
|
|
|
28
|
-
# Reject IDs containing pipe characters
|
|
53
|
+
# Reject IDs still containing pipe characters after cleaning
|
|
29
54
|
if '|' in candidate_id:
|
|
30
55
|
return False
|
|
31
56
|
|
|
@@ -46,13 +71,14 @@ def fix_csv_format(input_file, output_file):
|
|
|
46
71
|
"""
|
|
47
72
|
Read a CSV file and ensure all fields are properly quoted.
|
|
48
73
|
The csv module handles quoting automatically based on content.
|
|
49
|
-
Also
|
|
74
|
+
Also cleans and validates candidate IDs, filtering out invalid rows.
|
|
50
75
|
"""
|
|
51
76
|
with open(input_file, 'r') as infile:
|
|
52
77
|
reader = csv.reader(infile)
|
|
53
78
|
rows = list(reader)
|
|
54
79
|
|
|
55
80
|
rejected_count = 0
|
|
81
|
+
cleaned_count = 0
|
|
56
82
|
filtered_rows = []
|
|
57
83
|
|
|
58
84
|
for i, row in enumerate(rows):
|
|
@@ -67,14 +93,27 @@ def fix_csv_format(input_file, output_file):
|
|
|
67
93
|
|
|
68
94
|
candidate_id = row[0] if len(row) > 0 else ""
|
|
69
95
|
|
|
70
|
-
#
|
|
71
|
-
|
|
96
|
+
# Clean the candidate ID
|
|
97
|
+
cleaned_id, was_modified = clean_candidate_id(candidate_id)
|
|
98
|
+
|
|
99
|
+
if was_modified:
|
|
100
|
+
cleaned_count += 1
|
|
101
|
+
print(f"[INFO] Cleaned ID: '{candidate_id}' -> '{cleaned_id}'", file=sys.stderr)
|
|
102
|
+
row[0] = cleaned_id
|
|
103
|
+
|
|
104
|
+
# Check if candidate ID is valid after cleaning
|
|
105
|
+
if not is_valid_candidate_id(cleaned_id):
|
|
72
106
|
rejected_count += 1
|
|
73
|
-
print(f"[WARN] Rejecting corrupted record with invalid ID: {candidate_id}", file=sys.stderr)
|
|
107
|
+
print(f"[WARN] Rejecting corrupted record with invalid ID: {candidate_id} (cleaned: {cleaned_id})", file=sys.stderr)
|
|
74
108
|
continue
|
|
75
109
|
|
|
110
|
+
# Trim whitespace from all other fields
|
|
111
|
+
row = [field.strip() if isinstance(field, str) else field for field in row]
|
|
112
|
+
|
|
76
113
|
filtered_rows.append(row)
|
|
77
114
|
|
|
115
|
+
if cleaned_count > 0:
|
|
116
|
+
print(f"[INFO] Cleaned {cleaned_count} IDs (removed spaces, pipes, etc.)", file=sys.stderr)
|
|
78
117
|
if rejected_count > 0:
|
|
79
118
|
print(f"[INFO] Filtered out {rejected_count} corrupted records", file=sys.stderr)
|
|
80
119
|
|