npm - @pennyfarthing/core - Versions diffs - 7.6.1 → 7.7.0 - Mend

@pennyfarthing/core 7.6.1 → 7.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/README.md +109 -201
package/package.json +1 -1
package/packages/core/dist/cli/commands/doctor.d.ts.map +1 -1
package/packages/core/dist/cli/commands/doctor.js +91 -0
package/packages/core/dist/cli/commands/doctor.js.map +1 -1
package/packages/core/dist/cli/commands/init.js +31 -0
package/packages/core/dist/cli/commands/init.js.map +1 -1
package/packages/core/dist/cli/commands/update.js +31 -0
package/packages/core/dist/cli/commands/update.js.map +1 -1
package/pennyfarthing-dist/agents/architect.md +48 -53
package/pennyfarthing-dist/agents/dev.md +74 -164
package/pennyfarthing-dist/agents/devops.md +44 -39
package/pennyfarthing-dist/agents/handoff.md +46 -23
package/pennyfarthing-dist/agents/orchestrator.md +84 -255
package/pennyfarthing-dist/agents/pm.md +40 -50
package/pennyfarthing-dist/agents/reviewer-preflight.md +58 -26
package/pennyfarthing-dist/agents/reviewer.md +107 -298
package/pennyfarthing-dist/agents/sm-file-summary.md +51 -30
package/pennyfarthing-dist/agents/sm-finish.md +59 -38
package/pennyfarthing-dist/agents/sm-handoff.md +40 -33
package/pennyfarthing-dist/agents/sm-setup.md +89 -47
package/pennyfarthing-dist/agents/sm.md +171 -558
package/pennyfarthing-dist/agents/tea.md +77 -146
package/pennyfarthing-dist/agents/tech-writer.md +43 -24
package/pennyfarthing-dist/agents/testing-runner.md +73 -30
package/pennyfarthing-dist/agents/ux-designer.md +39 -25
package/pennyfarthing-dist/agents/workflow-status-check.md +34 -16
package/pennyfarthing-dist/commands/benchmark.md +19 -1
package/pennyfarthing-dist/commands/continue-session.md +1 -1
package/pennyfarthing-dist/commands/solo.md +5 -0
package/pennyfarthing-dist/commands/theme-maker.md +5 -5
package/pennyfarthing-dist/commands/work.md +1 -1
package/pennyfarthing-dist/guides/XML-TAGS.md +179 -0
package/pennyfarthing-dist/guides/agent-behavior.md +22 -9
package/pennyfarthing-dist/guides/agent-tag-taxonomy.md +432 -0
package/pennyfarthing-dist/guides/patterns/approval-gates-pattern.md +27 -7
package/pennyfarthing-dist/guides/scale-levels.md +114 -0
package/pennyfarthing-dist/personas/themes/gilligans-island.yaml +2 -2
package/pennyfarthing-dist/personas/themes/star-trek-tos.yaml +1 -1
package/pennyfarthing-dist/scripts/core/agent-session.sh +13 -7
package/pennyfarthing-dist/scripts/core/check-context.sh +6 -1
package/pennyfarthing-dist/scripts/core/prime.sh +57 -32
package/pennyfarthing-dist/scripts/git/create-feature-branches.sh +45 -4
package/pennyfarthing-dist/scripts/git/git-status-all.sh +32 -7
package/pennyfarthing-dist/scripts/hooks/bell-mode-hook.sh +30 -11
package/pennyfarthing-dist/scripts/hooks/pre-commit.sh +80 -23
package/pennyfarthing-dist/scripts/hooks/question-reflector-check.mjs +66 -53
package/pennyfarthing-dist/scripts/hooks/question-reflector-check.sh +4 -4
package/pennyfarthing-dist/scripts/hooks/question_reflector_check.py +402 -0
package/pennyfarthing-dist/scripts/hooks/session-stop.sh +7 -0
package/pennyfarthing-dist/scripts/hooks/welcome-hook.sh +94 -0
package/pennyfarthing-dist/scripts/jira/jira-claim-story.sh +10 -152
package/pennyfarthing-dist/scripts/jira/jira-sync-story.sh +14 -4
package/pennyfarthing-dist/scripts/jira/jira-sync.sh +12 -4
package/pennyfarthing-dist/scripts/jira/sync-epic-jira.sh +11 -99
package/pennyfarthing-dist/scripts/lib/common.sh +55 -0
package/pennyfarthing-dist/scripts/maintenance/sidecar-health.sh +97 -0
package/pennyfarthing-dist/scripts/misc/statusline.sh +27 -22
package/pennyfarthing-dist/scripts/story/create-story.sh +14 -154
package/pennyfarthing-dist/scripts/story/size-story.sh +12 -192
package/pennyfarthing-dist/scripts/story/story-template.sh +12 -156
package/pennyfarthing-dist/scripts/test/ground-truth-judge.py +24 -93
package/pennyfarthing-dist/scripts/test/swebench-judge.py +33 -59
package/pennyfarthing-dist/scripts/validation/validate-agent-schema.sh +575 -0
package/pennyfarthing-dist/scripts/workflow/check.py +502 -0
package/pennyfarthing-dist/skills/skill-registry.yaml +52 -16
package/pennyfarthing-dist/skills/sprint/skill.md +1 -1
package/pennyfarthing-dist/templates/settings.local.json.template +11 -0

package/pennyfarthing-dist/scripts/test/ground-truth-judge.py CHANGED Viewed

@@ -16,71 +16,21 @@ import sys
 from pathlib import Path
 from difflib import SequenceMatcher
-def load_swebench_data(cache_path="/tmp/swebench_all.json"):
-    """Load SWE-bench data from cache."""
-    with open(cache_path, 'r') as f:
-        return json.load(f)
-def find_scenario(data, scenario_name):
-    """Find scenario in SWE-bench data by name."""
-    # Normalize name (flask-5014 -> pallets__flask-5014)
-    for item in data:
-        instance_id = item.get('instance_id', '')
-        # Try various matching strategies
-        if scenario_name in instance_id.replace('__', '-'):
-            return item
-        if scenario_name.replace('-', '__') in instance_id:
-            return item
-    return None
-def extract_patch_elements(patch_text):
-    """Extract key elements from a patch."""
-    elements = {
-        'files': [],
-        'functions': [],
-        'additions': [],
-        'deletions': [],
-        'key_patterns': []
-    }
+# Add parent to path for pennyfarthing_scripts imports
+sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
+from pennyfarthing_scripts.swebench import (
+    extract_patch_info,
+    extract_problem_keywords,
+    find_scenario,
+    get_meaningful_patterns,
+    load_swebench_data,
+)
-    current_file = None
-    for line in patch_text.split('\n'):
-        # File changes
-        if line.startswith('diff --git'):
-            match = re.search(r'b/(.+)$', line)
-            if match:
-                current_file = match.group(1)
-                elements['files'].append(current_file)
-        # Function/class context
-        if line.startswith('@@'):
-            match = re.search(r'@@.*@@\s*(.+)$', line)
-            if match:
-                elements['functions'].append(match.group(1).strip())
-        # Additions
-        if line.startswith('+') and not line.startswith('+++'):
-            clean_line = line[1:].strip()
-            if clean_line and not clean_line.startswith('#'):
-                elements['additions'].append(clean_line)
-                # Extract key patterns (function calls, variable names, etc.)
-                patterns = re.findall(r'\b\w+\b', clean_line)
-                elements['key_patterns'].extend(patterns)
-        # Deletions
-        if line.startswith('-') and not line.startswith('---'):
-            clean_line = line[1:].strip()
-            if clean_line and not clean_line.startswith('#'):
-                elements['deletions'].append(clean_line)
-    # Deduplicate
-    elements['key_patterns'] = list(set(elements['key_patterns']))
-    return elements
 def score_response(response_text, ground_truth):
     """Score a response against ground truth patch."""
-    gt_elements = extract_patch_elements(ground_truth['patch'])
+    patch_info = extract_patch_info(ground_truth['patch'])
     scores = {
         'file_identification': 0,
@@ -94,16 +44,16 @@ def score_response(response_text, ground_truth):
     # 1. FILE IDENTIFICATION (20 points)
     files_found = 0
-    for f in gt_elements['files']:
+    for f in patch_info.files:
         # Check various forms of the filename
         filename = Path(f).name
         if filename.lower() in response_lower or f.lower() in response_lower:
             files_found += 1
-    if gt_elements['files']:
-        file_score = (files_found / len(gt_elements['files'])) * 20
+    if patch_info.files:
+        file_score = (files_found / len(patch_info.files)) * 20
         scores['file_identification'] = min(20, file_score)
-        scores['details']['files_expected'] = gt_elements['files']
+        scores['details']['files_expected'] = patch_info.files
         scores['details']['files_found'] = files_found
     else:
         scores['file_identification'] = 20  # No specific file in patch
@@ -111,7 +61,7 @@ def score_response(response_text, ground_truth):
     # 2. LOCATION IDENTIFICATION (20 points)
     # Look for function/class names mentioned in the patch
     locations_found = 0
-    for func in gt_elements['functions']:
+    for func in patch_info.functions:
         # Extract the function/class name
         func_match = re.search(r'(def|class)\s+(\w+)', func)
         if func_match:
@@ -121,20 +71,17 @@ def score_response(response_text, ground_truth):
         elif func.strip() and func.strip().split()[0] in response_lower:
             locations_found += 1
-    if gt_elements['functions']:
-        loc_score = (locations_found / len(gt_elements['functions'])) * 20
+    if patch_info.functions:
+        loc_score = (locations_found / len(patch_info.functions)) * 20
         scores['location_identification'] = min(20, loc_score)
-        scores['details']['locations_expected'] = gt_elements['functions'][:3]
+        scores['details']['locations_expected'] = patch_info.functions[:3]
         scores['details']['locations_found'] = locations_found
     else:
         scores['location_identification'] = 10  # Partial credit
     # 3. FIX LOGIC MATCH (40 points)
     # Check if key code patterns from the fix appear in the response
-    key_patterns = gt_elements['key_patterns']
-    # Filter to meaningful patterns (not common words)
-    common_words = {'if', 'else', 'return', 'self', 'def', 'class', 'for', 'in', 'not', 'and', 'or', 'is', 'none', 'true', 'false'}
-    meaningful_patterns = [p for p in key_patterns if p.lower() not in common_words and len(p) > 2]
+    meaningful_patterns = get_meaningful_patterns(patch_info.key_patterns)
     patterns_found = 0
     for pattern in meaningful_patterns:
@@ -150,7 +97,7 @@ def score_response(response_text, ground_truth):
     # Check for actual code additions
     additions_matched = 0
-    for addition in gt_elements['additions'][:5]:  # Check first 5 additions
+    for addition in patch_info.additions[:5]:  # Check first 5 additions
         # Normalize and check
         addition_normalized = re.sub(r'\s+', ' ', addition.lower())
         response_normalized = re.sub(r'\s+', ' ', response_lower)
@@ -160,8 +107,8 @@ def score_response(response_text, ground_truth):
         if similarity > 0.6 or addition_normalized in response_normalized:
             additions_matched += 1
-    if gt_elements['additions']:
-        addition_score = (additions_matched / min(5, len(gt_elements['additions']))) * 20
+    if patch_info.additions:
+        addition_score = (additions_matched / min(5, len(patch_info.additions))) * 20
         scores['details']['additions_matched'] = additions_matched
     else:
         addition_score = 10
@@ -205,23 +152,6 @@ def score_response(response_text, ground_truth):
     return scores
-def extract_problem_keywords(problem_statement):
-    """Extract key technical terms from problem statement."""
-    if not problem_statement:
-        return []
-    # Find quoted strings, function names, error messages
-    keywords = []
-    # Find quoted terms
-    quoted = re.findall(r'[`\'"]([^`\'"]+)[`\'"]', problem_statement)
-    keywords.extend(quoted)
-    # Find CamelCase or snake_case identifiers
-    identifiers = re.findall(r'\b[A-Z][a-z]+[A-Z]\w*\b|\b\w+_\w+\b', problem_statement)
-    keywords.extend(identifiers)
-    return list(set(keywords))[:10]
 def main():
     if len(sys.argv) < 3:
@@ -285,5 +215,6 @@ def main():
     return scores
 if __name__ == '__main__':
     main()

package/pennyfarthing-dist/scripts/test/swebench-judge.py CHANGED Viewed

@@ -17,52 +17,15 @@ import sys
 from pathlib import Path
 from difflib import SequenceMatcher
-def load_swebench_data(cache_path="/tmp/swebench_all.json"):
-    """Load SWE-bench ground truth data."""
-    with open(cache_path, 'r') as f:
-        return json.load(f)
-def find_ground_truth(data, scenario_name):
-    """Find scenario in SWE-bench data."""
-    for item in data:
-        instance_id = item.get('instance_id', '')
-        if scenario_name in instance_id.replace('__', '-'):
-            return item
-    return None
-def extract_patch_info(patch_text):
-    """Extract structured info from patch."""
-    info = {
-        'files': [],
-        'functions': [],
-        'additions': [],
-        'deletions': [],
-        'key_code': []
-    }
-    for line in patch_text.split('\n'):
-        if line.startswith('diff --git'):
-            match = re.search(r'b/(.+)$', line)
-            if match:
-                info['files'].append(match.group(1))
-        if line.startswith('@@'):
-            match = re.search(r'@@.*@@\s*(.+)$', line)
-            if match:
-                info['functions'].append(match.group(1).strip())
-        if line.startswith('+') and not line.startswith('+++'):
-            clean = line[1:].strip()
-            if clean and not clean.startswith('#'):
-                info['additions'].append(clean)
-                info['key_code'].append(clean)
+# Add parent to path for pennyfarthing_scripts imports
+sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
-        if line.startswith('-') and not line.startswith('---'):
-            clean = line[1:].strip()
-            if clean and not clean.startswith('#'):
-                info['deletions'].append(clean)
+from pennyfarthing_scripts.swebench import (
+    extract_patch_info,
+    find_scenario,
+    load_swebench_data,
+)
-    return info
 def score_identifies_bug_location(response, ground_truth):
     """Score IDENTIFIES_BUG_LOCATION (15 pts) using ground truth."""
@@ -74,34 +37,35 @@ def score_identifies_bug_location(response, ground_truth):
     # Check files (7.5 pts)
     files_found = 0
-    for f in patch_info['files']:
+    for f in patch_info.files:
         filename = Path(f).name.lower()
         if filename in response_lower or f.lower() in response_lower:
             files_found += 1
-    if patch_info['files']:
-        file_score = (files_found / len(patch_info['files'])) * 7.5
+    if patch_info.files:
+        file_score = (files_found / len(patch_info.files)) * 7.5
         score += file_score
-        details.append(f"Files: {files_found}/{len(patch_info['files'])} found")
+        details.append(f"Files: {files_found}/{len(patch_info.files)} found")
     # Check functions/classes (7.5 pts)
     funcs_found = 0
-    for func in patch_info['functions']:
+    for func in patch_info.functions:
         func_match = re.search(r'(def|class)\s+(\w+)', func)
         if func_match:
             func_name = func_match.group(2).lower()
             if func_name in response_lower:
                 funcs_found += 1
-    if patch_info['functions']:
-        func_score = min(7.5, (funcs_found / len(patch_info['functions'])) * 7.5)
+    if patch_info.functions:
+        func_score = min(7.5, (funcs_found / len(patch_info.functions)) * 7.5)
         score += func_score
-        details.append(f"Functions: {funcs_found}/{len(patch_info['functions'])} found")
+        details.append(f"Functions: {funcs_found}/{len(patch_info.functions)} found")
     else:
         score += 3.75  # Partial credit if no specific function in patch
     return min(15, score), details
 def score_explains_why_broken(response, ground_truth):
     """Score EXPLAINS_WHY_BROKEN (15 pts)."""
     response_lower = response.lower()
@@ -133,6 +97,7 @@ def score_explains_why_broken(response, ground_truth):
     return min(15, score), details
 def score_fix_addresses_issue(response, ground_truth):
     """Score FIX_ADDRESSES_ISSUE (20 pts) using ground truth patch."""
     patch_info = extract_patch_info(ground_truth.get('patch', ''))
@@ -143,7 +108,7 @@ def score_fix_addresses_issue(response, ground_truth):
     # Check if key additions from patch appear in response
     additions_matched = 0
-    for addition in patch_info['additions'][:5]:
+    for addition in patch_info.additions[:5]:
         # Normalize whitespace
         addition_norm = re.sub(r'\s+', ' ', addition.lower())
         response_norm = re.sub(r'\s+', ' ', response_lower)
@@ -157,10 +122,10 @@ def score_fix_addresses_issue(response, ground_truth):
             if sim > 0.7:
                 additions_matched += 0.5
-    if patch_info['additions']:
-        addition_score = (additions_matched / min(5, len(patch_info['additions']))) * 15
+    if patch_info.additions:
+        addition_score = (additions_matched / min(5, len(patch_info.additions))) * 15
         score += addition_score
-        details.append(f"Code matches: {additions_matched}/{min(5, len(patch_info['additions']))}")
+        details.append(f"Code matches: {additions_matched}/{min(5, len(patch_info.additions))}")
     # Check for code block with fix
     if '```' in response:
@@ -169,6 +134,7 @@ def score_fix_addresses_issue(response, ground_truth):
     return min(20, score), details
 def score_fix_is_minimal(response, ground_truth):
     """Score FIX_IS_MINIMAL (10 pts)."""
     patch_info = extract_patch_info(ground_truth.get('patch', ''))
@@ -177,7 +143,7 @@ def score_fix_is_minimal(response, ground_truth):
     details = []
     # Count lines in patch vs lines in response code blocks
-    patch_lines = len(patch_info['additions']) + len(patch_info['deletions'])
+    patch_lines = len(patch_info.additions) + len(patch_info.deletions)
     # Extract code blocks from response
     code_blocks = re.findall(r'```[\w]*\n(.*?)```', response, re.DOTALL)
@@ -200,6 +166,7 @@ def score_fix_is_minimal(response, ground_truth):
     return min(10, score), details
 def score_fix_syntax_correct(response):
     """Score FIX_SYNTAX_CORRECT (10 pts)."""
     score = 0
@@ -232,6 +199,7 @@ def score_fix_syntax_correct(response):
     return min(10, score), details
 def score_edge_cases(response):
     """Score EDGE_CASES (10 pts)."""
     response_lower = response.lower()
@@ -247,6 +215,7 @@ def score_edge_cases(response):
     return score, details
 def score_test_coverage(response):
     """Score TEST_COVERAGE (10 pts)."""
     response_lower = response.lower()
@@ -271,6 +240,7 @@ def score_test_coverage(response):
     return min(10, score), details
 def score_in_character(response, persona="senior developer"):
     """Score IN_CHARACTER (10 pts)."""
     response_lower = response.lower()
@@ -287,9 +257,10 @@ def score_in_character(response, persona="senior developer"):
     return score, details
 def judge_response(scenario_name, response_text, swebench_data):
     """Full judgment using scenario rubric + ground truth."""
-    ground_truth = find_ground_truth(swebench_data, scenario_name)
+    ground_truth = find_scenario(swebench_data, scenario_name)
     if not ground_truth:
         return {'error': f'Scenario {scenario_name} not found in SWE-bench data'}
@@ -345,15 +316,17 @@ def judge_response(scenario_name, response_text, swebench_data):
         scores['persona']['subtotal']
     )
+    patch_info = extract_patch_info(ground_truth.get('patch', ''))
     return {
         'scenario': scenario_name,
         'instance_id': ground_truth.get('instance_id'),
         'scores': scores,
         'total': round(total, 1),
         'details': all_details,
-        'ground_truth_files': extract_patch_info(ground_truth.get('patch', ''))['files']
+        'ground_truth_files': patch_info.files
     }
 def main():
     if len(sys.argv) < 3:
         print("Usage: swebench-judge.py <scenario_name> <response_file>")
@@ -396,5 +369,6 @@ def main():
         json.dump(result, f, indent=2)
     print(f"\nSaved to: {output_path}")
 if __name__ == '__main__':
     main()