npm - harness-evolver - Versions diffs - 4.2.1 → 4.2.2 - Mend

harness-evolver 4.2.1 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.claude-plugin/plugin.json +1 -1
package/package.json +1 -1
package/tools/adversarial_inject.py +1 -1
package/tools/dataset_health.py +2 -2
package/tools/read_results.py +1 -1
package/tools/regression_tracker.py +1 -1
package/tools/trace_insights.py +1 -1

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "harness-evolver",
   "description": "LangSmith-native autonomous agent optimization — evolves LLM agent code using multi-agent proposers, LangSmith experiments, and git worktrees",
-  "version": "4.2.1",
+  "version": "4.2.2",
   "author": {
     "name": "Raphael Valdetaro"
   },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "harness-evolver",
-  "version": "4.2.1",
+  "version": "4.2.2",
   "description": "LangSmith-native autonomous agent optimization for Claude Code",
   "author": "Raphael Valdetaro",
   "license": "MIT",

package/tools/adversarial_inject.py CHANGED Viewed

@@ -59,7 +59,7 @@ def detect_memorization(client, experiment_name, dataset_name):
     """Check if agent outputs are suspiciously similar to reference outputs."""
     suspicious = []
     try:
-        runs = list(client.list_runs(project_name=experiment_name, is_root=True, limit=200))
+        runs = list(client.list_runs(project_name=experiment_name, is_root=True, limit=100))
         examples = {str(e.id): e for e in client.list_examples(dataset_name=dataset_name, limit=500)}
         for run in runs:

package/tools/dataset_health.py CHANGED Viewed

@@ -68,7 +68,7 @@ def check_difficulty(client, config):
         return None
     try:
-        runs = list(client.list_runs(project_name=best_exp, is_root=True, limit=200))
+        runs = list(client.list_runs(project_name=best_exp, is_root=True, limit=100))
         if not runs:
             return None
@@ -129,7 +129,7 @@ def check_dead_examples(client, config):
     for exp_name in recent_exps:
         try:
-            runs = list(client.list_runs(project_name=exp_name, is_root=True, limit=200))
+            runs = list(client.list_runs(project_name=exp_name, is_root=True, limit=100))
             all_run_ids = [run.id for run in runs]
             if not all_run_ids:
                 continue

package/tools/read_results.py CHANGED Viewed

@@ -68,7 +68,7 @@ def read_experiment(client, experiment_name):
         runs = list(client.list_runs(
             project_name=experiment_name,
             is_root=True,
-            limit=200,
+            limit=100,
         ))
         if not runs:

package/tools/regression_tracker.py CHANGED Viewed

@@ -60,7 +60,7 @@ def get_per_example_scores(client, experiment_name):
     """Get per-example scores from an experiment."""
     scores = {}
     try:
-        runs = list(client.list_runs(project_name=experiment_name, is_root=True, limit=200))
+        runs = list(client.list_runs(project_name=experiment_name, is_root=True, limit=100))
         all_run_ids = [run.id for run in runs]
         all_feedbacks = list(client.list_feedback(run_ids=all_run_ids))
         fb_map = {}

package/tools/trace_insights.py CHANGED Viewed

@@ -332,7 +332,7 @@ def fetch_scores_from_experiment(experiment_name):
         runs = list(client.list_runs(
             project_name=experiment_name,
             is_root=True,
-            limit=200,
+            limit=100,
         ))
         all_run_ids = [run.id for run in runs]