redis-benchmarks-specification 0.2.29__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of redis-benchmarks-specification might be problematic. Click here for more details.

@@ -16,6 +16,7 @@ import os
16
16
  from tqdm import tqdm
17
17
  import argparse
18
18
  import numpy as np
19
+ from concurrent.futures import ThreadPoolExecutor
19
20
 
20
21
  from io import StringIO
21
22
  import sys
@@ -1182,6 +1183,388 @@ def get_by_strings(
1182
1183
  return baseline_str, by_str_baseline, comparison_str, by_str_comparison
1183
1184
 
1184
1185
 
1186
+ def process_single_test_comparison(
1187
+ test_name,
1188
+ tests_with_config,
1189
+ original_metric_mode,
1190
+ baseline_str,
1191
+ comparison_str,
1192
+ by_str_baseline,
1193
+ by_str_comparison,
1194
+ metric_name,
1195
+ test_filter,
1196
+ baseline_github_repo,
1197
+ comparison_github_repo,
1198
+ tf_triggering_env_baseline,
1199
+ tf_triggering_env_comparison,
1200
+ extra_filters,
1201
+ baseline_deployment_name,
1202
+ comparison_deployment_name,
1203
+ baseline_github_org,
1204
+ comparison_github_org,
1205
+ running_platform_baseline,
1206
+ running_platform_comparison,
1207
+ rts,
1208
+ from_ts_ms,
1209
+ to_ts_ms,
1210
+ last_n_baseline,
1211
+ last_n_comparison,
1212
+ verbose,
1213
+ regressions_percent_lower_limit,
1214
+ simplify_table,
1215
+ regression_str,
1216
+ improvement_str,
1217
+ progress,
1218
+ ):
1219
+ """
1220
+ Process comparison analysis for a single test.
1221
+
1222
+ Returns a dictionary containing all the results and side effects that need to be
1223
+ accumulated by the caller.
1224
+ """
1225
+ tested_groups = []
1226
+ tested_commands = []
1227
+ if test_name in tests_with_config:
1228
+ test_spec = tests_with_config[test_name]
1229
+ if "tested-groups" in test_spec:
1230
+ tested_groups = test_spec["tested-groups"]
1231
+ if "tested-commands" in test_spec:
1232
+ tested_commands = test_spec["tested-commands"]
1233
+ else:
1234
+ logging.error(f"Test does not contain spec info: {test_name}")
1235
+
1236
+ metric_mode = original_metric_mode
1237
+ compare_version = "main"
1238
+ # GE
1239
+ github_link = "https://github.com/redis/redis-benchmarks-specification/blob"
1240
+ test_path = f"redis_benchmarks_specification/test-suites/{test_name}.yml"
1241
+ test_link = f"[{test_name}]({github_link}/{compare_version}/{test_path})"
1242
+ multi_value_baseline = check_multi_value_filter(baseline_str)
1243
+ multi_value_comparison = check_multi_value_filter(comparison_str)
1244
+
1245
+ filters_baseline = [
1246
+ "metric={}".format(metric_name),
1247
+ "{}={}".format(test_filter, test_name),
1248
+ "github_repo={}".format(baseline_github_repo),
1249
+ "triggering_env={}".format(tf_triggering_env_baseline),
1250
+ ]
1251
+ if extra_filters != "":
1252
+ filters_baseline.append(extra_filters)
1253
+ if baseline_str != "":
1254
+ filters_baseline.append("{}={}".format(by_str_baseline, baseline_str))
1255
+ if baseline_deployment_name != "":
1256
+ filters_baseline.append(
1257
+ "deployment_name={}".format(baseline_deployment_name)
1258
+ )
1259
+ if baseline_github_org != "":
1260
+ filters_baseline.append(f"github_org={baseline_github_org}")
1261
+ if running_platform_baseline is not None and running_platform_baseline != "":
1262
+ filters_baseline.append(
1263
+ "running_platform={}".format(running_platform_baseline)
1264
+ )
1265
+ filters_comparison = [
1266
+ "metric={}".format(metric_name),
1267
+ "{}={}".format(test_filter, test_name),
1268
+ "github_repo={}".format(comparison_github_repo),
1269
+ "triggering_env={}".format(tf_triggering_env_comparison),
1270
+ ]
1271
+ if comparison_str != "":
1272
+ filters_comparison.append("{}={}".format(by_str_comparison, comparison_str))
1273
+ if comparison_deployment_name != "":
1274
+ filters_comparison.append(
1275
+ "deployment_name={}".format(comparison_deployment_name)
1276
+ )
1277
+ if extra_filters != "":
1278
+ filters_comparison.append(extra_filters)
1279
+ if comparison_github_org != "":
1280
+ filters_comparison.append(f"github_org={comparison_github_org}")
1281
+ if "hash" not in by_str_baseline:
1282
+ filters_baseline.append("hash==")
1283
+ if "hash" not in by_str_comparison:
1284
+ filters_comparison.append("hash==")
1285
+ if (
1286
+ running_platform_comparison is not None
1287
+ and running_platform_comparison != ""
1288
+ ):
1289
+ filters_comparison.append(
1290
+ "running_platform={}".format(running_platform_comparison)
1291
+ )
1292
+ baseline_timeseries = rts.ts().queryindex(filters_baseline)
1293
+ comparison_timeseries = rts.ts().queryindex(filters_comparison)
1294
+
1295
+ # avoiding target time-series
1296
+ comparison_timeseries = [x for x in comparison_timeseries if "target" not in x]
1297
+ baseline_timeseries = [x for x in baseline_timeseries if "target" not in x]
1298
+ progress.update()
1299
+ if verbose:
1300
+ logging.info(
1301
+ "Baseline timeseries for {}: {}. test={}".format(
1302
+ baseline_str, len(baseline_timeseries), test_name
1303
+ )
1304
+ )
1305
+ logging.info(
1306
+ "Comparison timeseries for {}: {}. test={}".format(
1307
+ comparison_str, len(comparison_timeseries), test_name
1308
+ )
1309
+ )
1310
+ if len(baseline_timeseries) > 1 and multi_value_baseline is False:
1311
+ baseline_timeseries = get_only_Totals(baseline_timeseries)
1312
+
1313
+ # Initialize result dictionary
1314
+ result = {
1315
+ 'skip_test': False,
1316
+ 'no_datapoints_baseline': False,
1317
+ 'no_datapoints_comparison': False,
1318
+ 'no_datapoints_both': False,
1319
+ 'baseline_only': False,
1320
+ 'comparison_only': False,
1321
+ 'detected_regression': False,
1322
+ 'detected_improvement': False,
1323
+ 'unstable': False,
1324
+ 'should_add_line': False,
1325
+ 'line': None,
1326
+ 'percentage_change': 0.0,
1327
+ 'tested_groups': tested_groups,
1328
+ 'tested_commands': tested_commands,
1329
+ 'boxplot_data': None,
1330
+ }
1331
+
1332
+ if len(baseline_timeseries) == 0:
1333
+ logging.warning(
1334
+ f"No datapoints for test={test_name} for baseline timeseries {baseline_timeseries}"
1335
+ )
1336
+ result['no_datapoints_baseline'] = True
1337
+ result['no_datapoints_both'] = True
1338
+
1339
+ if len(comparison_timeseries) == 0:
1340
+ logging.warning(
1341
+ f"No datapoints for test={test_name} for comparison timeseries {comparison_timeseries}"
1342
+ )
1343
+ result['no_datapoints_comparison'] = True
1344
+ result['no_datapoints_both'] = True
1345
+
1346
+ if len(baseline_timeseries) != 1 and multi_value_baseline is False:
1347
+ if verbose:
1348
+ logging.warning(
1349
+ "Skipping this test given the value of timeseries !=1. Baseline timeseries {}".format(
1350
+ len(baseline_timeseries)
1351
+ )
1352
+ )
1353
+ if len(baseline_timeseries) > 1:
1354
+ logging.warning(
1355
+ "\t\tTime-series: {}".format(", ".join(baseline_timeseries))
1356
+ )
1357
+ result['skip_test'] = True
1358
+ return result
1359
+
1360
+ if len(comparison_timeseries) > 1 and multi_value_comparison is False:
1361
+ comparison_timeseries = get_only_Totals(comparison_timeseries)
1362
+ if len(comparison_timeseries) != 1 and multi_value_comparison is False:
1363
+ if verbose:
1364
+ logging.warning(
1365
+ "Comparison timeseries {}".format(len(comparison_timeseries))
1366
+ )
1367
+ result['skip_test'] = True
1368
+ return result
1369
+
1370
+ baseline_v = "N/A"
1371
+ comparison_v = "N/A"
1372
+ baseline_values = []
1373
+ baseline_datapoints = []
1374
+ comparison_values = []
1375
+ comparison_datapoints = []
1376
+ percentage_change = 0.0
1377
+ baseline_v_str = "N/A"
1378
+ comparison_v_str = "N/A"
1379
+ largest_variance = 0
1380
+ baseline_pct_change = "N/A"
1381
+ comparison_pct_change = "N/A"
1382
+
1383
+ note = ""
1384
+ try:
1385
+ for ts_name_baseline in baseline_timeseries:
1386
+ datapoints_inner = rts.ts().revrange(
1387
+ ts_name_baseline, from_ts_ms, to_ts_ms
1388
+ )
1389
+ baseline_datapoints.extend(datapoints_inner)
1390
+ (
1391
+ baseline_pct_change,
1392
+ baseline_v,
1393
+ largest_variance,
1394
+ ) = get_v_pct_change_and_largest_var(
1395
+ baseline_datapoints,
1396
+ baseline_pct_change,
1397
+ baseline_v,
1398
+ baseline_values,
1399
+ largest_variance,
1400
+ last_n_baseline,
1401
+ verbose,
1402
+ )
1403
+ for ts_name_comparison in comparison_timeseries:
1404
+ datapoints_inner = rts.ts().revrange(
1405
+ ts_name_comparison, from_ts_ms, to_ts_ms
1406
+ )
1407
+ comparison_datapoints.extend(datapoints_inner)
1408
+
1409
+ (
1410
+ comparison_pct_change,
1411
+ comparison_v,
1412
+ largest_variance,
1413
+ ) = get_v_pct_change_and_largest_var(
1414
+ comparison_datapoints,
1415
+ comparison_pct_change,
1416
+ comparison_v,
1417
+ comparison_values,
1418
+ largest_variance,
1419
+ last_n_comparison,
1420
+ verbose,
1421
+ )
1422
+
1423
+ waterline = regressions_percent_lower_limit
1424
+ # if regressions_percent_lower_limit < largest_variance:
1425
+ # note = "waterline={:.1f}%.".format(largest_variance)
1426
+ # waterline = largest_variance
1427
+
1428
+ except redis.exceptions.ResponseError as e:
1429
+ logging.error(
1430
+ "Detected a redis.exceptions.ResponseError. {}".format(e.__str__())
1431
+ )
1432
+ pass
1433
+ except ZeroDivisionError as e:
1434
+ logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
1435
+ pass
1436
+
1437
+ unstable = False
1438
+
1439
+ if baseline_v != "N/A" and comparison_v == "N/A":
1440
+ logging.warning(
1441
+ f"Baseline contains datapoints but comparison not for test: {test_name}"
1442
+ )
1443
+ result['baseline_only'] = True
1444
+ if comparison_v != "N/A" and baseline_v == "N/A":
1445
+ logging.warning(
1446
+ f"Comparison contains datapoints but baseline not for test: {test_name}"
1447
+ )
1448
+ result['comparison_only'] = True
1449
+ if (
1450
+ baseline_v != "N/A"
1451
+ and comparison_pct_change != "N/A"
1452
+ and comparison_v != "N/A"
1453
+ and baseline_pct_change != "N/A"
1454
+ ):
1455
+ if comparison_pct_change > 10.0 or baseline_pct_change > 10.0:
1456
+ note = "UNSTABLE (very high variance)"
1457
+ unstable = True
1458
+ result['unstable'] = True
1459
+
1460
+ baseline_v_str = prepare_value_str(
1461
+ baseline_pct_change,
1462
+ baseline_v,
1463
+ baseline_values,
1464
+ simplify_table,
1465
+ metric_name,
1466
+ )
1467
+ comparison_v_str = prepare_value_str(
1468
+ comparison_pct_change,
1469
+ comparison_v,
1470
+ comparison_values,
1471
+ simplify_table,
1472
+ metric_name,
1473
+ )
1474
+
1475
+ if metric_mode == "higher-better":
1476
+ percentage_change = (
1477
+ float(comparison_v) / float(baseline_v) - 1
1478
+ ) * 100.0
1479
+ else:
1480
+ # lower-better
1481
+ percentage_change = (
1482
+ -(float(baseline_v) - float(comparison_v)) / float(baseline_v)
1483
+ ) * 100.0
1484
+
1485
+ # Collect data for box plot
1486
+ result['boxplot_data'] = (test_name, percentage_change)
1487
+ else:
1488
+ logging.warn(
1489
+ f"Missing data for test {test_name}. baseline_v={baseline_v} (pct_change={baseline_pct_change}), comparison_v={comparison_v} (pct_change={comparison_pct_change}) "
1490
+ )
1491
+
1492
+ result['percentage_change'] = percentage_change
1493
+
1494
+ if baseline_v != "N/A" or comparison_v != "N/A":
1495
+ detected_regression = False
1496
+ detected_improvement = False
1497
+ noise_waterline = 3
1498
+
1499
+ # For higher-better metrics: negative change = regression, positive change = improvement
1500
+ # For lower-better metrics: positive change = regression, negative change = improvement
1501
+ if metric_mode == "higher-better":
1502
+ # Higher is better: negative change is bad (regression), positive change is good (improvement)
1503
+ if percentage_change < 0.0:
1504
+ if -waterline >= percentage_change:
1505
+ detected_regression = True
1506
+ note = note + f" {regression_str}"
1507
+ elif percentage_change < -noise_waterline:
1508
+ if simplify_table is False:
1509
+ note = note + f" potential {regression_str}"
1510
+ else:
1511
+ if simplify_table is False:
1512
+ note = note + " No Change"
1513
+
1514
+ if percentage_change > 0.0:
1515
+ if percentage_change > waterline:
1516
+ detected_improvement = True
1517
+ note = note + f" {improvement_str}"
1518
+ elif percentage_change > noise_waterline:
1519
+ if simplify_table is False:
1520
+ note = note + f" potential {improvement_str}"
1521
+ else:
1522
+ if simplify_table is False:
1523
+ note = note + " No Change"
1524
+ else:
1525
+ # Lower is better: positive change is bad (regression), negative change is good (improvement)
1526
+ if percentage_change > 0.0:
1527
+ if percentage_change >= waterline:
1528
+ detected_regression = True
1529
+ note = note + f" {regression_str}"
1530
+ elif percentage_change > noise_waterline:
1531
+ if simplify_table is False:
1532
+ note = note + f" potential {regression_str}"
1533
+ else:
1534
+ if simplify_table is False:
1535
+ note = note + " No Change"
1536
+
1537
+ if percentage_change < 0.0:
1538
+ if -percentage_change > waterline:
1539
+ detected_improvement = True
1540
+ note = note + f" {improvement_str}"
1541
+ elif -percentage_change > noise_waterline:
1542
+ if simplify_table is False:
1543
+ note = note + f" potential {improvement_str}"
1544
+ else:
1545
+ if simplify_table is False:
1546
+ note = note + " No Change"
1547
+
1548
+ result['detected_regression'] = detected_regression
1549
+ result['detected_improvement'] = detected_improvement
1550
+
1551
+ line = get_line(
1552
+ baseline_v_str,
1553
+ comparison_v_str,
1554
+ note,
1555
+ percentage_change,
1556
+ test_link,
1557
+ )
1558
+ result['line'] = line
1559
+ else:
1560
+ logging.warning(
1561
+ "There were no datapoints both for baseline and comparison for test: {test_name}"
1562
+ )
1563
+ result['no_datapoints_both'] = True
1564
+
1565
+ return result
1566
+
1567
+
1185
1568
  def from_rts_to_regression_table(
1186
1569
  baseline_deployment_name,
1187
1570
  comparison_deployment_name,
@@ -1245,353 +1628,118 @@ def from_rts_to_regression_table(
1245
1628
 
1246
1629
  # Data collection for box plot
1247
1630
  boxplot_data = []
1248
- for test_name in test_names:
1249
- tested_groups = []
1250
- tested_commands = []
1251
- if test_name in tests_with_config:
1252
- test_spec = tests_with_config[test_name]
1253
- if "tested-groups" in test_spec:
1254
- tested_groups = test_spec["tested-groups"]
1255
- if "tested-commands" in test_spec:
1256
- tested_commands = test_spec["tested-commands"]
1257
- else:
1258
- logging.error(f"Test does not contain spec info: {test_name}")
1259
- metric_mode = original_metric_mode
1260
- compare_version = "main"
1261
- # GE
1262
- github_link = "https://github.com/redis/redis-benchmarks-specification/blob"
1263
- test_path = f"redis_benchmarks_specification/test-suites/{test_name}.yml"
1264
- test_link = f"[{test_name}]({github_link}/{compare_version}/{test_path})"
1265
- multi_value_baseline = check_multi_value_filter(baseline_str)
1266
- multi_value_comparison = check_multi_value_filter(comparison_str)
1267
-
1268
- filters_baseline = [
1269
- "metric={}".format(metric_name),
1270
- "{}={}".format(test_filter, test_name),
1271
- "github_repo={}".format(baseline_github_repo),
1272
- "triggering_env={}".format(tf_triggering_env_baseline),
1273
- ]
1274
- if extra_filters != "":
1275
- filters_baseline.append(extra_filters)
1276
- if baseline_str != "":
1277
- filters_baseline.append("{}={}".format(by_str_baseline, baseline_str))
1278
- if baseline_deployment_name != "":
1279
- filters_baseline.append(
1280
- "deployment_name={}".format(baseline_deployment_name)
1281
- )
1282
- if baseline_github_org != "":
1283
- filters_baseline.append(f"github_org={baseline_github_org}")
1284
- if running_platform_baseline is not None and running_platform_baseline != "":
1285
- filters_baseline.append(
1286
- "running_platform={}".format(running_platform_baseline)
1287
- )
1288
- filters_comparison = [
1289
- "metric={}".format(metric_name),
1290
- "{}={}".format(test_filter, test_name),
1291
- "github_repo={}".format(comparison_github_repo),
1292
- "triggering_env={}".format(tf_triggering_env_comparison),
1293
- ]
1294
- if comparison_str != "":
1295
- filters_comparison.append("{}={}".format(by_str_comparison, comparison_str))
1296
- if comparison_deployment_name != "":
1297
- filters_comparison.append(
1298
- "deployment_name={}".format(comparison_deployment_name)
1299
- )
1300
- if extra_filters != "":
1301
- filters_comparison.append(extra_filters)
1302
- if comparison_github_org != "":
1303
- filters_comparison.append(f"github_org={comparison_github_org}")
1304
- if "hash" not in by_str_baseline:
1305
- filters_baseline.append("hash==")
1306
- if "hash" not in by_str_comparison:
1307
- filters_comparison.append("hash==")
1308
- if (
1309
- running_platform_comparison is not None
1310
- and running_platform_comparison != ""
1311
- ):
1312
- filters_comparison.append(
1313
- "running_platform={}".format(running_platform_comparison)
1314
- )
1315
- baseline_timeseries = rts.ts().queryindex(filters_baseline)
1316
- comparison_timeseries = rts.ts().queryindex(filters_comparison)
1317
1631
 
1318
- # avoiding target time-series
1319
- comparison_timeseries = [x for x in comparison_timeseries if "target" not in x]
1320
- baseline_timeseries = [x for x in baseline_timeseries if "target" not in x]
1321
- progress.update()
1322
- if verbose:
1323
- logging.info(
1324
- "Baseline timeseries for {}: {}. test={}".format(
1325
- baseline_str, len(baseline_timeseries), test_name
1326
- )
1327
- )
1328
- logging.info(
1329
- "Comparison timeseries for {}: {}. test={}".format(
1330
- comparison_str, len(comparison_timeseries), test_name
1331
- )
1332
- )
1333
- if len(baseline_timeseries) > 1 and multi_value_baseline is False:
1334
- baseline_timeseries = get_only_Totals(baseline_timeseries)
1632
+ # First loop: Collect all test results using parallel processing
1633
+ test_results = []
1335
1634
 
1336
- if len(baseline_timeseries) == 0:
1337
- logging.warning(
1338
- f"No datapoints for test={test_name} for baseline timeseries {baseline_timeseries}"
1339
- )
1635
+ def process_test_wrapper(test_name):
1636
+ """Wrapper function to process a single test and return test_name with result"""
1637
+ result = process_single_test_comparison(
1638
+ test_name,
1639
+ tests_with_config,
1640
+ original_metric_mode,
1641
+ baseline_str,
1642
+ comparison_str,
1643
+ by_str_baseline,
1644
+ by_str_comparison,
1645
+ metric_name,
1646
+ test_filter,
1647
+ baseline_github_repo,
1648
+ comparison_github_repo,
1649
+ tf_triggering_env_baseline,
1650
+ tf_triggering_env_comparison,
1651
+ extra_filters,
1652
+ baseline_deployment_name,
1653
+ comparison_deployment_name,
1654
+ baseline_github_org,
1655
+ comparison_github_org,
1656
+ running_platform_baseline,
1657
+ running_platform_comparison,
1658
+ rts,
1659
+ from_ts_ms,
1660
+ to_ts_ms,
1661
+ last_n_baseline,
1662
+ last_n_comparison,
1663
+ verbose,
1664
+ regressions_percent_lower_limit,
1665
+ simplify_table,
1666
+ regression_str,
1667
+ improvement_str,
1668
+ progress,
1669
+ )
1670
+ return (test_name, result)
1671
+
1672
+ # Use ThreadPoolExecutor to process tests in parallel
1673
+ with ThreadPoolExecutor() as executor:
1674
+ test_results = list(executor.map(process_test_wrapper, test_names))
1675
+
1676
+ # Second loop: Process all collected results
1677
+ for test_name, result in test_results:
1678
+ # Handle the results from the extracted function
1679
+ if result['skip_test']:
1680
+ continue
1681
+
1682
+ if result['no_datapoints_baseline']:
1340
1683
  no_datapoints_baseline_list.append(test_name)
1341
1684
  if test_name not in no_datapoints_list:
1342
1685
  no_datapoints_list.append(test_name)
1343
1686
 
1344
- if len(comparison_timeseries) == 0:
1345
- logging.warning(
1346
- f"No datapoints for test={test_name} for comparison timeseries {comparison_timeseries}"
1347
- )
1687
+ if result['no_datapoints_comparison']:
1348
1688
  no_datapoints_comparison_list.append(test_name)
1349
1689
  if test_name not in no_datapoints_list:
1350
1690
  no_datapoints_list.append(test_name)
1351
1691
 
1352
- if len(baseline_timeseries) != 1 and multi_value_baseline is False:
1353
- if verbose:
1354
- logging.warning(
1355
- "Skipping this test given the value of timeseries !=1. Baseline timeseries {}".format(
1356
- len(baseline_timeseries)
1357
- )
1358
- )
1359
- if len(baseline_timeseries) > 1:
1360
- logging.warning(
1361
- "\t\tTime-series: {}".format(", ".join(baseline_timeseries))
1362
- )
1363
- continue
1364
-
1365
- if len(comparison_timeseries) > 1 and multi_value_comparison is False:
1366
- comparison_timeseries = get_only_Totals(comparison_timeseries)
1367
- if len(comparison_timeseries) != 1 and multi_value_comparison is False:
1368
- if verbose:
1369
- logging.warning(
1370
- "Comparison timeseries {}".format(len(comparison_timeseries))
1371
- )
1372
- continue
1692
+ if result['baseline_only']:
1693
+ baseline_only_list.append(test_name)
1373
1694
 
1374
- baseline_v = "N/A"
1375
- comparison_v = "N/A"
1376
- baseline_values = []
1377
- baseline_datapoints = []
1378
- comparison_values = []
1379
- comparison_datapoints = []
1380
- percentage_change = 0.0
1381
- baseline_v_str = "N/A"
1382
- comparison_v_str = "N/A"
1383
- largest_variance = 0
1384
- baseline_pct_change = "N/A"
1385
- comparison_pct_change = "N/A"
1386
-
1387
- note = ""
1388
- try:
1389
- for ts_name_baseline in baseline_timeseries:
1390
- datapoints_inner = rts.ts().revrange(
1391
- ts_name_baseline, from_ts_ms, to_ts_ms
1392
- )
1393
- baseline_datapoints.extend(datapoints_inner)
1394
- (
1395
- baseline_pct_change,
1396
- baseline_v,
1397
- largest_variance,
1398
- ) = get_v_pct_change_and_largest_var(
1399
- baseline_datapoints,
1400
- baseline_pct_change,
1401
- baseline_v,
1402
- baseline_values,
1403
- largest_variance,
1404
- last_n_baseline,
1405
- verbose,
1406
- )
1407
- for ts_name_comparison in comparison_timeseries:
1408
- datapoints_inner = rts.ts().revrange(
1409
- ts_name_comparison, from_ts_ms, to_ts_ms
1410
- )
1411
- comparison_datapoints.extend(datapoints_inner)
1412
-
1413
- (
1414
- comparison_pct_change,
1415
- comparison_v,
1416
- largest_variance,
1417
- ) = get_v_pct_change_and_largest_var(
1418
- comparison_datapoints,
1419
- comparison_pct_change,
1420
- comparison_v,
1421
- comparison_values,
1422
- largest_variance,
1423
- last_n_comparison,
1424
- verbose,
1425
- )
1695
+ if result['comparison_only']:
1696
+ comparison_only_list.append(test_name)
1426
1697
 
1427
- waterline = regressions_percent_lower_limit
1428
- # if regressions_percent_lower_limit < largest_variance:
1429
- # note = "waterline={:.1f}%.".format(largest_variance)
1430
- # waterline = largest_variance
1698
+ if result['unstable']:
1699
+ unstable_list.append([test_name, "n/a"])
1431
1700
 
1432
- except redis.exceptions.ResponseError as e:
1433
- logging.error(
1434
- "Detected a redis.exceptions.ResponseError. {}".format(e.__str__())
1435
- )
1436
- pass
1437
- except ZeroDivisionError as e:
1438
- logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
1439
- pass
1440
- unstable = False
1701
+ if result['boxplot_data']:
1702
+ boxplot_data.append(result['boxplot_data'])
1441
1703
 
1442
- if baseline_v != "N/A" and comparison_v == "N/A":
1443
- logging.warning(
1444
- "Baseline contains datapoints but comparison not for test: {test_name}"
1445
- )
1446
- baseline_only_list.append(test_name)
1447
- if comparison_v != "N/A" and baseline_v == "N/A":
1448
- logging.warning(
1449
- "Comparison contains datapoints but baseline not for test: {test_name}"
1450
- )
1451
- comparison_only_list.append(test_name)
1452
- if (
1453
- baseline_v != "N/A"
1454
- and comparison_pct_change != "N/A"
1455
- and comparison_v != "N/A"
1456
- and baseline_pct_change != "N/A"
1457
- ):
1458
- if comparison_pct_change > 10.0 or baseline_pct_change > 10.0:
1459
- note = "UNSTABLE (very high variance)"
1460
- unstable = True
1461
- unstable_list.append([test_name, "n/a"])
1462
-
1463
- baseline_v_str = prepare_value_str(
1464
- baseline_pct_change,
1465
- baseline_v,
1466
- baseline_values,
1467
- simplify_table,
1468
- metric_name,
1469
- )
1470
- comparison_v_str = prepare_value_str(
1471
- comparison_pct_change,
1472
- comparison_v,
1473
- comparison_values,
1474
- simplify_table,
1475
- metric_name,
1476
- )
1704
+ # Handle group and command changes
1705
+ for test_group in result['tested_groups']:
1706
+ if test_group not in group_change:
1707
+ group_change[test_group] = []
1708
+ group_change[test_group].append(result['percentage_change'])
1477
1709
 
1478
- if metric_mode == "higher-better":
1479
- percentage_change = (
1480
- float(comparison_v) / float(baseline_v) - 1
1481
- ) * 100.0
1482
- else:
1483
- # lower-better
1484
- percentage_change = (
1485
- -(float(baseline_v) - float(comparison_v)) / float(baseline_v)
1486
- ) * 100.0
1710
+ for test_command in result['tested_commands']:
1711
+ if test_command not in command_change:
1712
+ command_change[test_command] = []
1713
+ command_change[test_command].append(result['percentage_change'])
1487
1714
 
1488
- # Collect data for box plot
1489
- boxplot_data.append((test_name, percentage_change))
1490
- else:
1491
- logging.warn(
1492
- f"Missing data for test {test_name}. baseline_v={baseline_v} (pct_change={baseline_pct_change}), comparison_v={comparison_v} (pct_change={comparison_pct_change}) "
1493
- )
1494
- if baseline_v != "N/A" or comparison_v != "N/A":
1495
- detected_regression = False
1496
- detected_improvement = False
1497
-
1498
- # For higher-better metrics: negative change = regression, positive change = improvement
1499
- # For lower-better metrics: positive change = regression, negative change = improvement
1500
- if metric_mode == "higher-better":
1501
- # Higher is better: negative change is bad (regression), positive change is good (improvement)
1502
- if percentage_change < 0.0:
1503
- if -waterline >= percentage_change:
1504
- detected_regression = True
1505
- total_regressions = total_regressions + 1
1506
- note = note + f" {regression_str}"
1507
- detected_regressions.append(test_name)
1508
- elif percentage_change < -noise_waterline:
1509
- if simplify_table is False:
1510
- note = note + f" potential {regression_str}"
1511
- else:
1512
- if simplify_table is False:
1513
- note = note + " No Change"
1514
-
1515
- if percentage_change > 0.0:
1516
- if percentage_change > waterline:
1517
- detected_improvement = True
1518
- total_improvements = total_improvements + 1
1519
- note = note + f" {improvement_str}"
1520
- elif percentage_change > noise_waterline:
1521
- if simplify_table is False:
1522
- note = note + f" potential {improvement_str}"
1523
- else:
1524
- if simplify_table is False:
1525
- note = note + " No Change"
1526
- else:
1527
- # Lower is better: positive change is bad (regression), negative change is good (improvement)
1528
- if percentage_change > 0.0:
1529
- if percentage_change >= waterline:
1530
- detected_regression = True
1531
- total_regressions = total_regressions + 1
1532
- note = note + f" {regression_str}"
1533
- detected_regressions.append(test_name)
1534
- elif percentage_change > noise_waterline:
1535
- if simplify_table is False:
1536
- note = note + f" potential {regression_str}"
1537
- else:
1538
- if simplify_table is False:
1539
- note = note + " No Change"
1540
-
1541
- if percentage_change < 0.0:
1542
- if -percentage_change > waterline:
1543
- detected_improvement = True
1544
- total_improvements = total_improvements + 1
1545
- note = note + f" {improvement_str}"
1546
- elif -percentage_change > noise_waterline:
1547
- if simplify_table is False:
1548
- note = note + f" potential {improvement_str}"
1549
- else:
1550
- if simplify_table is False:
1551
- note = note + " No Change"
1552
-
1553
- for test_group in tested_groups:
1554
- if test_group not in group_change:
1555
- group_change[test_group] = []
1556
- group_change[test_group].append(percentage_change)
1557
-
1558
- for test_command in tested_commands:
1559
- if test_command not in command_change:
1560
- command_change[test_command] = []
1561
- command_change[test_command].append(percentage_change)
1562
-
1563
- if (
1564
- detected_improvement is False
1565
- and detected_regression is False
1566
- and not unstable
1567
- ):
1568
- total_stable = total_stable + 1
1715
+ # Handle regression/improvement detection and table updates
1716
+ if result['line'] is not None:
1717
+ detected_regression = result['detected_regression']
1718
+ detected_improvement = result['detected_improvement']
1719
+ unstable = result['unstable']
1720
+ line = result['line']
1721
+ percentage_change = result['percentage_change']
1569
1722
 
1570
- if unstable:
1571
- total_unstable += 1
1572
-
1573
- should_add_line = False
1574
- line = get_line(
1575
- baseline_v_str,
1576
- comparison_v_str,
1577
- note,
1578
- percentage_change,
1579
- test_link,
1580
- )
1581
1723
  if detected_regression:
1724
+ total_regressions = total_regressions + 1
1725
+ detected_regressions.append(test_name)
1582
1726
  regressions_list.append([test_name, percentage_change])
1583
1727
  table_regressions.append(line)
1584
1728
 
1585
1729
  if detected_improvement:
1730
+ total_improvements = total_improvements + 1
1586
1731
  improvements_list.append([test_name, percentage_change])
1587
1732
  table_improvements.append(line)
1588
1733
 
1589
1734
  if unstable:
1735
+ total_unstable += 1
1590
1736
  table_unstable.append(line)
1591
1737
  else:
1592
1738
  if not detected_regression and not detected_improvement:
1739
+ total_stable = total_stable + 1
1593
1740
  table_stable.append(line)
1594
1741
 
1742
+ should_add_line = False
1595
1743
  if print_regressions_only and detected_regression:
1596
1744
  should_add_line = True
1597
1745
  if print_improvements_only and detected_improvement:
@@ -1604,10 +1752,7 @@ def from_rts_to_regression_table(
1604
1752
  if should_add_line:
1605
1753
  total_comparison_points = total_comparison_points + 1
1606
1754
  table_full.append(line)
1607
- else:
1608
- logging.warning(
1609
- "There were no datapoints both for baseline and comparison for test: {test_name}"
1610
- )
1755
+ elif result['no_datapoints_both']:
1611
1756
  if test_name not in no_datapoints_list:
1612
1757
  no_datapoints_list.append(test_name)
1613
1758
  logging.warning(
@@ -1736,19 +1881,33 @@ def prepare_value_str(
1736
1881
  return baseline_v_str
1737
1882
 
1738
1883
 
1884
+ def filter_test_names_by_regex(test_names, tags_regex_string):
1885
+ """
1886
+ Filter test names based on regex pattern.
1887
+
1888
+ Args:
1889
+ test_names: List of test names to filter
1890
+ tags_regex_string: Regex pattern to match against test names
1891
+
1892
+ Returns:
1893
+ List of filtered test names that match the regex pattern
1894
+ """
1895
+ final_test_names = []
1896
+ for test_name in test_names:
1897
+ if not isinstance(test_name, str):
1898
+ test_name = test_name.decode()
1899
+ match_obj = re.search(tags_regex_string, test_name)
1900
+ if match_obj is not None:
1901
+ final_test_names.append(test_name)
1902
+ return final_test_names
1903
+
1904
+
1739
1905
  def get_test_names_from_db(rts, tags_regex_string, test_names, used_key):
1740
1906
  try:
1741
1907
  test_names = rts.smembers(used_key)
1742
1908
  test_names = list(test_names)
1743
1909
  test_names.sort()
1744
- final_test_names = []
1745
- for test_name in test_names:
1746
- if not isinstance(test_name, str):
1747
- test_name = test_name.decode()
1748
- match_obj = re.search(tags_regex_string, test_name)
1749
- if match_obj is not None:
1750
- final_test_names.append(test_name)
1751
- test_names = final_test_names
1910
+ test_names = filter_test_names_by_regex(test_names, tags_regex_string)
1752
1911
 
1753
1912
  except redis.exceptions.ResponseError as e:
1754
1913
  logging.warning(
@@ -2281,14 +2281,14 @@ def filter_test_files(
2281
2281
  continue
2282
2282
 
2283
2283
  if tests_regexp != ".*":
2284
- logging.info(
2284
+ logging.debug(
2285
2285
  "Filtering all tests via a regular expression: {}".format(tests_regexp)
2286
2286
  )
2287
2287
  tags_regex_string = re.compile(tests_regexp)
2288
2288
 
2289
2289
  match_obj = re.search(tags_regex_string, test_file)
2290
2290
  if match_obj is None:
2291
- logging.info(
2291
+ logging.debug(
2292
2292
  "Skipping {} given it does not match regex {}".format(
2293
2293
  test_file, tests_regexp
2294
2294
  )
@@ -1,7 +1,8 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: redis-benchmarks-specification
3
- Version: 0.2.29
3
+ Version: 0.2.30
4
4
  Summary: The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute.
5
+ License-File: LICENSE
5
6
  Author: filipecosta90
6
7
  Author-email: filipecosta.90@gmail.com
7
8
  Requires-Python: >=3.10.0,<4.0.0
@@ -9,6 +10,8 @@ Classifier: Programming Language :: Python :: 3
9
10
  Classifier: Programming Language :: Python :: 3.10
10
11
  Classifier: Programming Language :: Python :: 3.11
11
12
  Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
12
15
  Requires-Dist: Flask (>=2.0.3,<3.0.0)
13
16
  Requires-Dist: Flask-HTTPAuth (>=4.4.0,<5.0.0)
14
17
  Requires-Dist: GitPython (>=3.1.20,<4.0.0)
@@ -21,7 +21,7 @@ redis_benchmarks_specification/__common__/suppress_warnings.py,sha256=xpOjJ_piGY
21
21
  redis_benchmarks_specification/__common__/timeseries.py,sha256=kHpkpNwZgWpjCh_Fg0wFcxNRMTb5SoSNwd_UHUCNVhc,54283
22
22
  redis_benchmarks_specification/__compare__/__init__.py,sha256=DtBXRp0Q01XgCFmY-1OIePMyyYihVNAjZ1Y8zwqSDN0,101
23
23
  redis_benchmarks_specification/__compare__/args.py,sha256=CNtA7pI9CJDTBJPGL2pNVfis7VDdxLautwRyka7oUCI,8911
24
- redis_benchmarks_specification/__compare__/compare.py,sha256=_AbuV3FZxtUZIdq4qq24LNzPNIdtQQaqrk8bUjn9blk,84327
24
+ redis_benchmarks_specification/__compare__/compare.py,sha256=7fawmGqLCz5buCqfKRkM-wgLUUxNkFOITxhpDKrVdos,87802
25
25
  redis_benchmarks_specification/__init__.py,sha256=YQIEx2sLPPA0JR9OuCuMNMNtm-f_gqDKgzvNJnkGNKY,491
26
26
  redis_benchmarks_specification/__runner__/__init__.py,sha256=l-G1z-t6twUgi8QLueqoTQLvJmv3hJoEYskGm6H7L6M,83
27
27
  redis_benchmarks_specification/__runner__/args.py,sha256=K3VGmBC0-9lSv9H6VDp0N-6FGMWvc_4H0pG_TOXN5u8,11312
@@ -37,7 +37,7 @@ redis_benchmarks_specification/__self_contained_coordinator__/docker.py,sha256=0
37
37
  redis_benchmarks_specification/__self_contained_coordinator__/post_processing.py,sha256=sVLKNnWdAqYY9DjVdqRC5tDaIrVSaI3Ca7w8-DQ-LRM,776
38
38
  redis_benchmarks_specification/__self_contained_coordinator__/prepopulation.py,sha256=1UeFr2T1ZQBcHCSd4W1ZtaWgXyFPfjLyDi_DgDc1eTA,2957
39
39
  redis_benchmarks_specification/__self_contained_coordinator__/runners.py,sha256=IESJoOgbLJxhwf27f0UIUrOxTLuXQAQkz8-LH0CAhAw,33606
40
- redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py,sha256=CY9Ame9I2zwOyOfZTlQst7sxiCI8EWVSOjIGxxpRQIc,113591
40
+ redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py,sha256=uAVn-XXEHaWjZyotXq1kLzcvPrdjeYQwkmZyQyvcd6c,113593
41
41
  redis_benchmarks_specification/__setups__/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  redis_benchmarks_specification/__setups__/topologies.py,sha256=xQ1IJkcTji_ZjLiJd3vOxZpvbNtBLZw9cPkw5hGJKHU,481
43
43
  redis_benchmarks_specification/__spec__/__init__.py,sha256=l-G1z-t6twUgi8QLueqoTQLvJmv3hJoEYskGm6H7L6M,83
@@ -296,8 +296,8 @@ redis_benchmarks_specification/test-suites/memtier_benchmark-stream-10M-entries-
296
296
  redis_benchmarks_specification/test-suites/memtier_benchmark-stream-concurrent-xadd-xreadgroup-70-30.yml,sha256=M707Z-uJ-xDiNfkcIjtJWZecSRepOQDbTy-CUStsbqM,1964
297
297
  redis_benchmarks_specification/test-suites/template.txt,sha256=ezqGiRPOvuSDO0iG7GEf-AGXNfHbgXI89_G0RUEzL88,481
298
298
  redis_benchmarks_specification/vector-search-test-suites/vector_db_benchmark_test.yml,sha256=PD7ow-k4Ll2BkhEC3aIqiaCZt8Hc4aJIp96Lw3J3mcI,791
299
- redis_benchmarks_specification-0.2.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
300
- redis_benchmarks_specification-0.2.29.dist-info/METADATA,sha256=bNTOqUlfR-bui3NzUeJWdRZBhcrhS8qQRfcrx2ZpENQ,22767
301
- redis_benchmarks_specification-0.2.29.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
302
- redis_benchmarks_specification-0.2.29.dist-info/entry_points.txt,sha256=x5WBXCZsnDRTZxV7SBGmC65L2k-ygdDOxV8vuKN00Nk,715
303
- redis_benchmarks_specification-0.2.29.dist-info/RECORD,,
299
+ redis_benchmarks_specification-0.2.30.dist-info/METADATA,sha256=7Xcyp-HDZ53YksM2weD17CpH-VieaOir3BFvh58vqqc,22891
300
+ redis_benchmarks_specification-0.2.30.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
301
+ redis_benchmarks_specification-0.2.30.dist-info/entry_points.txt,sha256=x5WBXCZsnDRTZxV7SBGmC65L2k-ygdDOxV8vuKN00Nk,715
302
+ redis_benchmarks_specification-0.2.30.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
303
+ redis_benchmarks_specification-0.2.30.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 2.2.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any