npm - create-walle - Versions diffs - 0.9.21 → 0.9.23 - Mend

create-walle 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (500) hide show

package/template/wall-e/eval/benchmarks/coding.json DELETED Viewed

@@ -1,122 +0,0 @@
-[
-  {
-    "id": "coding-001",
-    "prompt": "Write a JavaScript function called `debounce` that takes a function and a delay in milliseconds, and returns a debounced version that delays invocation until after the delay has elapsed since the last call.",
-    "taskType": "coding",
-    "difficulty": "easy",
-    "expectedTraits": ["has code block", "defines function", "uses setTimeout", "uses clearTimeout"],
-    "tags": ["js", "utility"]
-  },
-  {
-    "id": "coding-002",
-    "prompt": "Write a Python function that reads a CSV file and returns a list of dictionaries, where each dictionary maps column headers to row values.",
-    "taskType": "coding",
-    "difficulty": "easy",
-    "expectedTraits": ["has code block", "uses csv module", "returns list", "handles headers"],
-    "tags": ["python", "file-io"]
-  },
-  {
-    "id": "coding-003",
-    "prompt": "Write a JavaScript async function called `fetchWithRetry` that fetches a URL and retries up to 3 times with exponential backoff if the request fails.",
-    "taskType": "coding",
-    "difficulty": "easy",
-    "expectedTraits": ["has code block", "uses async/await", "has retry logic", "has backoff"],
-    "tags": ["js", "async", "networking"]
-  },
-  {
-    "id": "coding-004",
-    "prompt": "Implement a function in JavaScript that finds the longest common subsequence (LCS) of two strings. Return the length of the LCS.",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["has code block", "uses dynamic programming", "handles empty strings", "returns number"],
-    "tags": ["js", "algorithm", "dynamic-programming"]
-  },
-  {
-    "id": "coding-005",
-    "prompt": "Write a Python function that implements a binary search on a sorted list and returns the index of the target element, or -1 if not found.",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["has code block", "uses binary search", "handles not found", "defines function"],
-    "tags": ["python", "algorithm", "search"]
-  },
-  {
-    "id": "coding-006",
-    "prompt": "The following JavaScript code throws 'Cannot read property of undefined' intermittently. Debug and fix it:\n\n```js\nasync function getUserPosts(userId) {\n  const response = await fetch(`/api/users/${userId}`);\n  const user = await response.json();\n  return user.posts.map(p => p.title);\n}\n```",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["identifies null check missing", "adds optional chaining or guard", "checks response status", "has code block"],
-    "tags": ["js", "debugging", "async"]
-  },
-  {
-    "id": "coding-007",
-    "prompt": "Debug this Python code that should flatten a nested list but produces incorrect output:\n\n```python\ndef flatten(lst):\n    result = []\n    for item in lst:\n        if isinstance(item, list):\n            flatten(item)\n        else:\n            result.append(item)\n    return result\n```",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["identifies missing extend/concatenation", "fixes recursive call", "has code block", "explains the bug"],
-    "tags": ["python", "debugging", "recursion"]
-  },
-  {
-    "id": "coding-008",
-    "prompt": "Write a JavaScript implementation of a simple LRU (Least Recently Used) cache with `get(key)` and `put(key, value)` methods and a configurable max capacity.",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["has code block", "uses Map or linked list", "implements eviction", "has get and put methods"],
-    "tags": ["js", "data-structure", "algorithm"]
-  },
-  {
-    "id": "coding-009",
-    "prompt": "Write a Python decorator called `@rate_limit(max_calls, period)` that limits how many times a function can be called within a given time period in seconds. Raise an exception if the limit is exceeded.",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["has code block", "uses decorator pattern", "tracks timestamps", "raises exception"],
-    "tags": ["python", "decorator", "rate-limiting"]
-  },
-  {
-    "id": "coding-010",
-    "prompt": "Implement a JavaScript function that deep-merges two objects. Arrays should be concatenated, nested objects should be recursively merged, and primitive values from the second object should override the first.",
-    "taskType": "coding",
-    "difficulty": "medium",
-    "expectedTraits": ["has code block", "handles arrays", "handles nested objects", "handles primitives"],
-    "tags": ["js", "utility", "recursion"]
-  },
-  {
-    "id": "coding-011",
-    "prompt": "Refactor this Express.js route handler to use proper error handling, input validation, and separation of concerns. It currently does everything inline:\n\n```js\napp.post('/api/orders', async (req, res) => {\n  const { userId, items, address } = req.body;\n  const user = await db.query('SELECT * FROM users WHERE id = ?', [userId]);\n  if (!user) return res.status(404).json({ error: 'User not found' });\n  let total = 0;\n  for (const item of items) {\n    const product = await db.query('SELECT * FROM products WHERE id = ?', [item.productId]);\n    total += product.price * item.quantity;\n  }\n  const order = await db.query('INSERT INTO orders (user_id, total, address) VALUES (?, ?, ?)', [userId, total, address]);\n  res.json({ orderId: order.insertId, total });\n});\n```",
-    "taskType": "coding",
-    "difficulty": "hard",
-    "expectedTraits": ["separates validation", "extracts service layer", "adds try-catch", "has code block", "improves structure"],
-    "tags": ["js", "refactoring", "express", "architecture"]
-  },
-  {
-    "id": "coding-012",
-    "prompt": "Design and implement a simple event emitter in JavaScript that supports `on(event, listener)`, `off(event, listener)`, `once(event, listener)`, and `emit(event, ...args)`. Include proper memory leak prevention.",
-    "taskType": "coding",
-    "difficulty": "hard",
-    "expectedTraits": ["has code block", "implements all four methods", "handles once correctly", "mentions max listeners or cleanup"],
-    "tags": ["js", "design-pattern", "event-driven"]
-  },
-  {
-    "id": "coding-013",
-    "prompt": "Design a rate-limited async task queue in JavaScript that: processes tasks concurrently up to a configurable limit, supports priority levels, and allows cancellation of pending tasks. Provide the implementation with JSDoc comments.",
-    "taskType": "coding",
-    "difficulty": "hard",
-    "expectedTraits": ["has code block", "has concurrency limit", "has priority support", "has cancellation", "has jsdoc"],
-    "tags": ["js", "async", "architecture", "queue"]
-  },
-  {
-    "id": "coding-014",
-    "prompt": "Implement a Python context manager that provides database transaction management with automatic rollback on exceptions, savepoint support for nested transactions, and connection pooling awareness.",
-    "taskType": "coding",
-    "difficulty": "hard",
-    "expectedTraits": ["has code block", "uses context manager", "handles rollback", "supports nesting"],
-    "tags": ["python", "database", "architecture"]
-  },
-  {
-    "id": "coding-015",
-    "prompt": "You have a Node.js application that is leaking memory in production. The heap grows by ~50MB/hour. The app uses Express, processes webhooks, and caches results in a plain object. Walk through a systematic debugging approach and suggest code fixes for common causes.",
-    "taskType": "coding",
-    "difficulty": "hard",
-    "expectedTraits": ["mentions heap snapshots", "identifies unbounded cache", "suggests WeakMap or LRU", "mentions event listener leaks", "has code block"],
-    "tags": ["js", "debugging", "performance", "node"]
-  }
-]

package/template/wall-e/eval/benchmarks/memory-retrieval.json DELETED Viewed

@@ -1,234 +0,0 @@
-[
-  {
-    "id": "session-recall-001",
-    "prompt": "Use Wall-E memory to answer: in the prior Codex parser session, which file was changed and what test command proved the fix?",
-    "taskType": "memory-retrieval",
-    "difficulty": "medium",
-    "expectedTraits": ["searches session memory", "mentions parser.js", "mentions node --test", "cites session id"],
-    "tags": ["session-recall", "coding", "sanitized-real-shape"],
-    "retrieval": {
-      "query": "parser src/parser.js node",
-      "expectedSourceIds": ["codex:sanitized-parser"],
-      "expectedSnippets": ["src/parser.js", "node --test tests/parser.test.js"],
-      "seedMemories": [
-        {
-          "source": "codex-jsonl",
-          "source_id": "codex:sanitized-parser:exchange:1",
-          "memory_type": "coding_session_exchange",
-          "timestamp": "2026-04-28T09:15:00.000Z",
-          "cwd": "/repo/app",
-          "content": "Q: Fix the parser crash when quoted values include commas.\nA: Decision: keep the tokenizer stateful instead of adding a regex split. Files: src/parser.js, tests/parser.test.js. Command: node --test tests/parser.test.js passed.",
-          "metadata": {
-            "sourceId": "codex:sanitized-parser",
-            "cwd": "/repo/app",
-            "gitBranch": "fix/parser-quoted-values",
-            "filesEdited": ["src/parser.js", "tests/parser.test.js"],
-            "commands": ["node --test tests/parser.test.js"]
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "session-recall-002",
-    "prompt": "Use Wall-E memory to answer: why did the prior Claude session reject the batch write approach for the queue worker?",
-    "taskType": "memory-retrieval",
-    "difficulty": "medium",
-    "expectedTraits": ["searches session memory", "mentions lock contention", "mentions queue-worker.js", "cites session id"],
-    "tags": ["decision", "failure", "sanitized-real-shape"],
-    "retrieval": {
-      "query": "queue worker lock contention",
-      "expectedSourceIds": ["claude:sanitized-queue"],
-      "expectedSnippets": ["lock contention", "queue-worker.js"],
-      "seedMemories": [
-        {
-          "source": "claude-code-jsonl",
-          "source_id": "claude:sanitized-queue:exchange:4",
-          "memory_type": "coding_session_exchange",
-          "timestamp": "2026-04-27T17:22:00.000Z",
-          "cwd": "/repo/service",
-          "content": "Q: Speed up queue persistence.\nA: Blocker: batch writes increased SQLite lock contention under concurrent workers. Decision: keep single-row retry with jitter. Files: workers/queue-worker.js, tests/queue-worker.test.js. Command: npm test -- queue-worker passed.",
-          "metadata": {
-            "sourceId": "claude:sanitized-queue",
-            "cwd": "/repo/service",
-            "gitBranch": "fix/queue-locking",
-            "filesEdited": ["workers/queue-worker.js", "tests/queue-worker.test.js"],
-            "commands": ["npm test -- queue-worker"]
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "session-recall-003",
-    "prompt": "Use Wall-E memory to answer: which browser test covered the transcript blank-space regression?",
-    "taskType": "memory-retrieval",
-    "difficulty": "medium",
-    "expectedTraits": ["searches session memory", "mentions codex-blank-space.spec.js", "mentions blank gap", "cites session id"],
-    "tags": ["ui-regression", "browser-test", "sanitized-real-shape"],
-    "retrieval": {
-      "query": "blank-gap codex-blank-space.spec.js",
-      "expectedSourceIds": ["walle:sanitized-terminal-ui"],
-      "expectedSnippets": ["codex-blank-space.spec.js", "completed-turn blank-gap compaction"],
-      "seedMemories": [
-        {
-          "source": "walle-jsonl",
-          "source_id": "walle:sanitized-terminal-ui:assistant:12",
-          "memory_type": "coding_session_assistant_message",
-          "timestamp": "2026-04-29T11:05:00.000Z",
-          "cwd": "/repo/tools",
-          "content": "Decision: fix completed-turn blank-gap compaction in the Codex terminal renderer. Files: claude-task-manager/public/session-stream.js, claude-task-manager/tests/codex-blank-space.spec.js. Command: npx playwright test claude-task-manager/tests/codex-blank-space.spec.js passed.",
-          "metadata": {
-            "sourceId": "walle:sanitized-terminal-ui",
-            "cwd": "/repo/tools",
-            "gitBranch": "fix/codex-terminal-blank-gap",
-            "filesEdited": ["claude-task-manager/public/session-stream.js", "claude-task-manager/tests/codex-blank-space.spec.js"],
-            "commands": ["npx playwright test claude-task-manager/tests/codex-blank-space.spec.js"]
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "session-recall-004",
-    "prompt": "Use Wall-E memory to answer: what was the next step after the model routing quorum session?",
-    "taskType": "memory-retrieval",
-    "difficulty": "hard",
-    "expectedTraits": ["searches diary", "mentions router inputs", "mentions evaluation", "cites diary/session id"],
-    "tags": ["diary", "handoff", "sanitized-real-shape"],
-    "retrieval": {
-      "query": "model routing quorum next step router inputs evaluation diary",
-      "expectedSourceIds": ["diary:walle:sanitized-quorum:stop"],
-      "expectedSnippets": ["feed quorum results into routing", "run trusted evaluation"],
-      "seedMemories": [
-        {
-          "source": "walle-diary",
-          "source_id": "diary:walle:sanitized-quorum:stop",
-          "memory_type": "agent_diary",
-          "timestamp": "2026-04-26T20:30:00.000Z",
-          "cwd": "/repo/tools",
-          "content": "Agent diary for walle session sanitized-quorum (stop)\nSummary: feed quorum results into routing instead of leaving them as a side report.\nChanged files: wall-e/evaluation/coding-quorum.js; wall-e/routing/model-router.js\nDecisions: use reviewer/quorum/security data as router inputs.\nNext steps: run trusted evaluation on coding-agent-real cases.",
-          "metadata": {
-            "sourceId": "diary:walle:sanitized-quorum:stop",
-            "sessionId": "sanitized-quorum",
-            "agent": "walle",
-            "event": "stop",
-            "cwd": "/repo/tools",
-            "changed_files": ["wall-e/evaluation/coding-quorum.js", "wall-e/routing/model-router.js"],
-            "next_steps": ["run trusted evaluation on coding-agent-real cases"]
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "session-recall-005",
-    "prompt": "Use Wall-E memory to answer: which source adapter handled Gemini JSONL and what privacy class did it use?",
-    "taskType": "memory-retrieval",
-    "difficulty": "medium",
-    "expectedTraits": ["searches session memory", "mentions gemini-jsonl", "mentions pii_potential", "cites session id"],
-    "tags": ["source-adapter", "privacy", "sanitized-real-shape"],
-    "retrieval": {
-      "query": "Gemini JSONL source adapter privacy class pii_potential",
-      "expectedSourceIds": ["codex:sanitized-source-adapters"],
-      "expectedSnippets": ["gemini-jsonl", "pii_potential"],
-      "seedMemories": [
-        {
-          "source": "codex-jsonl",
-          "source_id": "codex:sanitized-source-adapters:assistant:8",
-          "memory_type": "coding_session_assistant_message",
-          "timestamp": "2026-04-29T14:02:00.000Z",
-          "cwd": "/repo/tools",
-          "content": "Decision: register claude-code-jsonl, codex-jsonl, gemini-jsonl, and walle-jsonl as first-party source adapters. The default privacy class for coding session adapters is pii_potential because local transcripts can include personal context.",
-          "metadata": {
-            "sourceId": "codex:sanitized-source-adapters",
-            "cwd": "/repo/tools",
-            "filesEdited": ["wall-e/sources/gemini-jsonl.js", "wall-e/sources/builtin.js"]
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "session-recall-006",
-    "prompt": "Use Wall-E memory to answer honestly: do we have a remembered decision about replacing SQLite with ChromaDB?",
-    "taskType": "memory-retrieval",
-    "difficulty": "hard",
-    "expectedTraits": ["searches session memory", "says do not replace SQLite", "mentions sqlite-vec", "does not hallucinate approval"],
-    "tags": ["negative-recall", "architecture", "sanitized-real-shape"],
-    "retrieval": {
-      "query": "replace SQLite with ChromaDB sqlite-vec decision",
-      "expectedSourceIds": ["claude:sanitized-memory-architecture"],
-      "expectedSnippets": ["Keep SQLite plus sqlite-vec", "Do not adopt ChromaDB"],
-      "seedMemories": [
-        {
-          "source": "claude-code-jsonl",
-          "source_id": "claude:sanitized-memory-architecture:exchange:2",
-          "memory_type": "coding_session_exchange",
-          "timestamp": "2026-04-29T13:45:00.000Z",
-          "cwd": "/repo/tools",
-          "content": "Q: Should Wall-E adopt ChromaDB from the reference project?\nA: Decision: Keep SQLite plus sqlite-vec. Do not adopt ChromaDB; port the source-adapter semantics and retrieval tests instead.",
-          "metadata": {
-            "sourceId": "claude:sanitized-memory-architecture",
-            "cwd": "/repo/tools",
-            "gitBranch": "feat/session-memory-protocol"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "profile-recall-001",
-    "prompt": "Use Wall-E memory to answer: what is the user's communication style and how should an agent adapt?",
-    "taskType": "memory-retrieval",
-    "difficulty": "medium",
-    "expectedTraits": ["uses Wall-E memory", "mentions direct", "mentions evidence", "mentions thorough verification"],
-    "tags": ["self-profile", "communication-style", "digital-twin"],
-    "retrieval": {
-      "query": "communication style direct evidence verification",
-      "expectedSourceIds": ["profile:sanitized-style"],
-      "expectedSnippets": ["direct", "evidence", "thorough verification"],
-      "seedMemories": [
-        {
-          "source": "ctm",
-          "source_id": "profile:sanitized-style",
-          "memory_type": "coding_session_user_message",
-          "timestamp": "2026-05-02T12:00:00.000Z",
-          "cwd": "/repo/tools",
-          "content": "The user's communication style is direct and evidence-based. They prefer thorough verification, concrete dates, clear tradeoffs, and concise summaries after implementation.",
-          "metadata": {
-            "sourceId": "profile:sanitized-style",
-            "cwd": "/repo/tools"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "id": "person-recall-001",
-    "prompt": "Use Wall-E memory before public web: do you know Casey Example?",
-    "taskType": "memory-retrieval",
-    "difficulty": "medium",
-    "expectedTraits": ["uses Wall-E memory", "does not search public web first", "mentions colleague context", "cites memory evidence"],
-    "tags": ["person-lookup", "private-context", "digital-twin"],
-    "retrieval": {
-      "query": "Casey Example colleague planning strategy",
-      "expectedSourceIds": ["person:sanitized-casey"],
-      "expectedSnippets": ["Casey Example", "colleague", "planning"],
-      "seedMemories": [
-        {
-          "source": "slack",
-          "source_id": "person:sanitized-casey",
-          "memory_type": "message",
-          "timestamp": "2026-03-24T17:16:00.000Z",
-          "cwd": "/repo/tools",
-          "content": "Casey Example appears in the user's work context as a colleague involved in planning, team strategy, and prioritization discussions.",
-          "metadata": {
-            "sourceId": "person:sanitized-casey",
-            "privacy": "sanitized"
-          }
-        }
-      ]
-    }
-  }
-]

package/template/wall-e/eval/benchmarks/reasoning.json DELETED Viewed

@@ -1,82 +0,0 @@
-[
-  {
-    "id": "reasoning-001",
-    "prompt": "I need to choose between PostgreSQL and MongoDB for a new e-commerce application. The app will have complex product relationships, user reviews, and order history. What should I consider?",
-    "taskType": "reasoning",
-    "difficulty": "medium",
-    "expectedTraits": ["multiple options considered", "pros and cons", "considers use case fit", "conclusion"],
-    "tags": ["database", "architecture", "trade-offs"]
-  },
-  {
-    "id": "reasoning-002",
-    "prompt": "Our team of 5 developers needs to decide between monorepo and polyrepo for our microservices. We have 8 services, shared libraries, and deploy independently. Walk me through the decision.",
-    "taskType": "reasoning",
-    "difficulty": "medium",
-    "expectedTraits": ["multiple options considered", "pros and cons", "considers team size", "numbered steps", "conclusion"],
-    "tags": ["architecture", "devops", "trade-offs"]
-  },
-  {
-    "id": "reasoning-003",
-    "prompt": "A farmer needs to get a fox, a chicken, and a bag of grain across a river. The boat only holds the farmer and one item. The fox will eat the chicken if left alone, and the chicken will eat the grain if left alone. How does the farmer do it?",
-    "taskType": "reasoning",
-    "difficulty": "easy",
-    "expectedTraits": ["numbered steps", "correct solution", "explains constraints", "step-by-step"],
-    "tags": ["logic-puzzle", "classic"]
-  },
-  {
-    "id": "reasoning-004",
-    "prompt": "We're experiencing intermittent 502 errors on our production API. The errors happen about 5% of the time, seem random, and started after a deployment last week. Outline a systematic debugging approach.",
-    "taskType": "reasoning",
-    "difficulty": "medium",
-    "expectedTraits": ["numbered steps", "checks logs first", "considers deployment changes", "systematic approach", "mentions monitoring"],
-    "tags": ["debugging", "ops", "systematic"]
-  },
-  {
-    "id": "reasoning-005",
-    "prompt": "If all roses are flowers, and some flowers fade quickly, can we conclude that some roses fade quickly? Explain your reasoning step by step.",
-    "taskType": "reasoning",
-    "difficulty": "medium",
-    "expectedTraits": ["identifies logical fallacy", "step-by-step", "correct conclusion", "explains why"],
-    "tags": ["logic", "syllogism"]
-  },
-  {
-    "id": "reasoning-006",
-    "prompt": "Plan a migration strategy for moving a legacy PHP monolith to Node.js microservices. The monolith serves 10K RPM, has a MySQL database, and the team has 3 months. What's your phased approach?",
-    "taskType": "reasoning",
-    "difficulty": "hard",
-    "expectedTraits": ["numbered steps", "phased approach", "considers risk", "mentions strangler pattern", "realistic timeline"],
-    "tags": ["migration", "architecture", "planning"]
-  },
-  {
-    "id": "reasoning-007",
-    "prompt": "You have $10,000 to invest and want to balance growth with safety. You're 30 years old with stable income. Compare investing in index funds, individual stocks, bonds, and real estate REITs. What allocation would you recommend and why?",
-    "taskType": "reasoning",
-    "difficulty": "hard",
-    "expectedTraits": ["multiple options considered", "pros and cons", "considers age and risk", "specific allocation", "conclusion"],
-    "tags": ["finance", "decision-making", "trade-offs"]
-  },
-  {
-    "id": "reasoning-008",
-    "prompt": "Three switches in one room control three light bulbs in another room. You can only go to the bulb room once. How do you figure out which switch controls which bulb?",
-    "taskType": "reasoning",
-    "difficulty": "easy",
-    "expectedTraits": ["correct solution", "uses heat trick", "numbered steps", "explains logic"],
-    "tags": ["logic-puzzle", "classic"]
-  },
-  {
-    "id": "reasoning-009",
-    "prompt": "Our startup needs to decide between building our own authentication system vs using Auth0 vs Firebase Auth. We have 2 developers, expect to reach 50K users in year one, and need SSO support for enterprise clients. Analyze the options.",
-    "taskType": "reasoning",
-    "difficulty": "medium",
-    "expectedTraits": ["multiple options considered", "pros and cons", "considers team size", "considers scale", "conclusion"],
-    "tags": ["architecture", "build-vs-buy", "trade-offs"]
-  },
-  {
-    "id": "reasoning-010",
-    "prompt": "A company has 100 employees. 60% use Mac, 50% use Chrome, and 30% use both Mac and Chrome. If you pick a random employee, what is the probability they use neither Mac nor Chrome? Show your work.",
-    "taskType": "reasoning",
-    "difficulty": "medium",
-    "expectedTraits": ["shows calculation", "uses inclusion-exclusion", "correct answer", "step-by-step"],
-    "tags": ["math", "probability", "set-theory"]
-  }
-]

package/template/wall-e/eval/benchmarks/swebench-lite-30.json DELETED Viewed

@@ -1,212 +0,0 @@
-[
-  {
-    "instance_id": "django__django-11099",
-    "repo": "django/django",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "UsernameValidator allows trailing newline"
-  },
-  {
-    "instance_id": "django__django-11283",
-    "repo": "django/django",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Migration auth.0011 is not backward compatible"
-  },
-  {
-    "instance_id": "django__django-12708",
-    "repo": "django/django",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Migration optimizer does not reduce multiple AlterField"
-  },
-  {
-    "instance_id": "flask__flask-4045",
-    "repo": "pallets/flask",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Raise error when blueprint name contains a dot"
-  },
-  {
-    "instance_id": "flask__flask-4992",
-    "repo": "pallets/flask",
-    "difficulty": "medium",
-    "category": "enhancement",
-    "description": "Add --app and --debug options to flask CLI"
-  },
-  {
-    "instance_id": "requests__requests-3362",
-    "repo": "psf/requests",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Allow setting verify=False in session"
-  },
-  {
-    "instance_id": "requests__requests-1963",
-    "repo": "psf/requests",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Non-conforming HTTP header causes UnicodeError"
-  },
-  {
-    "instance_id": "sympy__sympy-13146",
-    "repo": "sympy/sympy",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Exponent printing issue in pretty printer"
-  },
-  {
-    "instance_id": "sympy__sympy-13471",
-    "repo": "sympy/sympy",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Python code generation not supporting user functions"
-  },
-  {
-    "instance_id": "sympy__sympy-15345",
-    "repo": "sympy/sympy",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Mathematica code printer does not handle Max/Min properly"
-  },
-  {
-    "instance_id": "scikit-learn__scikit-learn-10297",
-    "repo": "scikit-learn/scikit-learn",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "linear_model.RidgeClassifierCV default store_cv_values incompatible"
-  },
-  {
-    "instance_id": "scikit-learn__scikit-learn-13142",
-    "repo": "scikit-learn/scikit-learn",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "GaussianMixture predict and predict_proba inconsistent"
-  },
-  {
-    "instance_id": "scikit-learn__scikit-learn-13779",
-    "repo": "scikit-learn/scikit-learn",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Voting estimator does not work with set_params"
-  },
-  {
-    "instance_id": "matplotlib__matplotlib-23314",
-    "repo": "matplotlib/matplotlib",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "set_visible on legend not working"
-  },
-  {
-    "instance_id": "matplotlib__matplotlib-23476",
-    "repo": "matplotlib/matplotlib",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Axes3D.scatter ignores depthshade keyword argument"
-  },
-  {
-    "instance_id": "matplotlib__matplotlib-25332",
-    "repo": "matplotlib/matplotlib",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Axes.set_ticks behavior with labels parameter"
-  },
-  {
-    "instance_id": "astropy__astropy-6938",
-    "repo": "astropy/astropy",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Possible issue in io.fits with uint conversion"
-  },
-  {
-    "instance_id": "astropy__astropy-14182",
-    "repo": "astropy/astropy",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Inconsistent handling of empty in ASCII table"
-  },
-  {
-    "instance_id": "sphinx-doc__sphinx-8273",
-    "repo": "sphinx-doc/sphinx",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Generate man page section directories"
-  },
-  {
-    "instance_id": "sphinx-doc__sphinx-8474",
-    "repo": "sphinx-doc/sphinx",
-    "difficulty": "hard",
-    "category": "enhancement",
-    "description": "Check if 'rst_prolog' affects domain directives"
-  },
-  {
-    "instance_id": "pylint-dev__pylint-7080",
-    "repo": "pylint-dev/pylint",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Regex pattern does not match for ignore-paths"
-  },
-  {
-    "instance_id": "pylint-dev__pylint-7114",
-    "repo": "pylint-dev/pylint",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Linting fails on __init__ constructor with too-many-function-args"
-  },
-  {
-    "instance_id": "pytest-dev__pytest-5221",
-    "repo": "pytest-dev/pytest",
-    "difficulty": "easy",
-    "category": "bug-fix",
-    "description": "Display hint when 'python -m pytest' warning is shown"
-  },
-  {
-    "instance_id": "pytest-dev__pytest-5692",
-    "repo": "pytest-dev/pytest",
-    "difficulty": "medium",
-    "category": "bug-fix",
-    "description": "Hostname and timestamp not added to Junit XML report"
-  },
-  {
-    "instance_id": "pytest-dev__pytest-7373",
-    "repo": "pytest-dev/pytest",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Incorrect caching of directories with --import-mode=importlib"
-  },
-  {
-    "instance_id": "django__django-13230",
-    "repo": "django/django",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Add support for item_comments to syndication framework"
-  },
-  {
-    "instance_id": "django__django-13710",
-    "repo": "django/django",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "Use admin_order_field for related fields in ModelAdmin"
-  },
-  {
-    "instance_id": "sympy__sympy-20442",
-    "repo": "sympy/sympy",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "convert_to not working for composites of other units"
-  },
-  {
-    "instance_id": "scikit-learn__scikit-learn-14983",
-    "repo": "scikit-learn/scikit-learn",
-    "difficulty": "hard",
-    "category": "enhancement",
-    "description": "RepeatedKFold and RepeatedStratifiedKFold do not show correct __repr__"
-  },
-  {
-    "instance_id": "matplotlib__matplotlib-23562",
-    "repo": "matplotlib/matplotlib",
-    "difficulty": "hard",
-    "category": "bug-fix",
-    "description": "mpl_toolkits.mplot3d.art3d.Poly3DCollection set_verts problem"
-  }
-]