PyPI - ifcraftcorpus - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

ifcraftcorpus 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

ifcraftcorpus/search.py CHANGED Viewed

@@ -40,13 +40,26 @@ Classes:
 from __future__ import annotations
+import logging
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Any, Literal
 from ifcraftcorpus.index import CorpusIndex
+logger = logging.getLogger(__name__)
+def _truncate(value: str, limit: int = 120) -> str:
+    """Trim long query strings for readable logging."""
+    if len(value) <= limit:
+        return value
+    return f"{value[:limit]}..."
 if TYPE_CHECKING:
+    from ifcraftcorpus.embeddings import EmbeddingIndex
     from ifcraftcorpus.providers import EmbeddingProvider
@@ -190,7 +203,15 @@ class Corpus:
         self._use_bundled = use_bundled
         self._fts_index: CorpusIndex | None = None
-        self._embedding_index = None  # Lazy loaded
+        self._embedding_index: EmbeddingIndex | None = None  # Lazy loaded
+        logger.debug(
+            "Corpus init corpus_dir=%s index_path=%s embeddings_path=%s use_bundled=%s",
+            corpus_dir,
+            index_path,
+            embeddings_path,
+            use_bundled,
+        )
     def _get_corpus_dir(self) -> Path:
         """Get the corpus directory path.
@@ -202,6 +223,7 @@ class Corpus:
             ValueError: If no corpus directory can be found.
         """
         if self._corpus_dir:
+            logger.debug("Using provided corpus directory: %s", self._corpus_dir)
             return self._corpus_dir
         if self._use_bundled:
@@ -214,15 +236,17 @@ class Corpus:
                 # Check for installed shared data (pip install)
                 bundled = Path(sys.prefix) / "share" / "ifcraftcorpus" / "corpus"
                 if bundled.exists():
+                    logger.debug("Using bundled corpus directory: %s", bundled)
                     return bundled
                 # Check relative to package (development mode / editable install)
                 pkg_dir = Path(ifcraftcorpus.__file__).parent
                 dev_corpus = pkg_dir.parent.parent / "corpus"
                 if dev_corpus.exists():
+                    logger.debug("Using development corpus directory: %s", dev_corpus)
                     return dev_corpus
             except Exception:
-                pass
+                logger.debug("Failed to auto-detect bundled corpus directory", exc_info=True)
         raise ValueError(
             "No corpus directory found. Provide corpus_dir or install package with bundled corpus."
@@ -239,15 +263,17 @@ class Corpus:
         """
         if self._fts_index is None:
             if self._index_path and self._index_path.exists():
+                logger.debug("Loading corpus index from %s", self._index_path)
                 self._fts_index = CorpusIndex(self._index_path)
             else:
                 # Build in-memory index
-                self._fts_index = CorpusIndex()
                 corpus_dir = self._get_corpus_dir()
+                logger.debug("Building in-memory corpus index from %s", corpus_dir)
+                self._fts_index = CorpusIndex()
                 self._fts_index.build_from_directory(corpus_dir)
         return self._fts_index
-    def _get_embedding_index(self):
+    def _get_embedding_index(self) -> EmbeddingIndex | None:
         """Get the embedding index for semantic search.
         Lazily loads the embedding index if embeddings_path was provided.
@@ -258,6 +284,7 @@ class Corpus:
             EmbeddingIndex instance or None if unavailable.
         """
         if self._embedding_index is None and self._embeddings_path:
+            logger.debug("Attempting to load embeddings from %s", self._embeddings_path)
             try:
                 from ifcraftcorpus.embeddings import EmbeddingIndex
@@ -269,7 +296,9 @@ class Corpus:
                         self._embeddings_path, provider=self._embedding_provider
                     )
             except ImportError:
-                pass  # embeddings not available
+                logger.debug("Embedding support not installed", exc_info=True)
+        elif self._embedding_index is None and not self._embeddings_path:
+            logger.debug("No embeddings path configured; semantic search disabled")
         return self._embedding_index
     def build_embeddings(self, *, force: bool = False) -> int:
@@ -309,12 +338,14 @@ class Corpus:
             and self._embeddings_path.exists()
             and (self._embeddings_path / "metadata.json").exists()
         ):
+            logger.info("Embeddings already exist at %s; skipping rebuild", self._embeddings_path)
             return 0
         from ifcraftcorpus.embeddings import EmbeddingIndex
         embedding_index = EmbeddingIndex(provider=self._embedding_provider)
+        logger.info("Building embeddings into %s", self._embeddings_path)
         count = 0
         for doc_info in self.list_documents():
             doc = self.get_document(doc_info["name"])
@@ -359,6 +390,7 @@ class Corpus:
         embedding_index.save(self._embeddings_path)
         self._embedding_index = embedding_index
+        logger.info("Saved embeddings (%s items) to %s", count, self._embeddings_path)
         return count
     def search(
@@ -407,6 +439,14 @@ class Corpus:
             >>> # Semantic search (if embeddings available)
             >>> results = corpus.search("scary atmosphere", mode="semantic")
         """
+        logger.debug(
+            "Corpus.search query=%r cluster=%s limit=%s mode=%s",
+            _truncate(query),
+            cluster,
+            limit,
+            mode,
+        )
         results: list[CorpusResult] = []
         if mode in ("keyword", "hybrid"):
@@ -445,18 +485,26 @@ class Corpus:
         # Deduplicate and sort by score
         if mode == "hybrid":
-            seen = set()
-            unique_results = []
-            for r in sorted(results, key=lambda x: x.score, reverse=True):
-                key = (r.document_name, r.section_heading)
+            seen: set[tuple[str, str | None]] = set()
+            unique_results: list[CorpusResult] = []
+            sorted_results: list[CorpusResult] = sorted(
+                results, key=lambda x: x.score, reverse=True
+            )
+            for result in sorted_results:
+                key = (result.document_name, result.section_heading)
                 if key not in seen:
                     seen.add(key)
-                    unique_results.append(r)
+                    unique_results.append(result)
             results = unique_results[:limit]
+        logger.debug(
+            "Corpus.search returning %s results (mode=%s)",
+            len(results),
+            mode,
+        )
         return results
-    def get_document(self, name: str) -> dict | None:
+    def get_document(self, name: str) -> dict[str, Any] | None:
         """Get a document by name with all its sections.
         Retrieves complete document data including metadata and all

{ifcraftcorpus-1.1.0.data → ifcraftcorpus-1.2.1.data}/data/share/ifcraftcorpus/corpus/agent-design/agent_prompt_engineering.md RENAMED Viewed

@@ -285,6 +285,70 @@ Small models may interpret as "never validate" or "always validate."
 ---
+## Sampling Parameters
+Sampling parameters control the randomness and diversity of LLM outputs. The two most important are **temperature** and **top_p**. These can be set per API call, enabling different settings for different phases of a workflow.
+### Temperature
+Temperature controls the probability distribution over tokens. Lower values make the model more deterministic; higher values increase randomness and creativity.
+| Temperature | Effect | Use Cases |
+|-------------|--------|-----------|
+| 0.0–0.2 | Highly deterministic, consistent | Structured output, tool calling, factual responses |
+| 0.3–0.5 | Balanced, slight variation | General conversation, summarization |
+| 0.6–0.8 | More creative, diverse | Brainstorming, draft generation |
+| 0.9–1.0+ | High randomness, exploratory | Creative writing, idea exploration, poetry |
+**How it works:** Temperature scales the logits (pre-softmax scores) before sampling. At T=0, the model always picks the highest-probability token. At T>1, probability differences flatten, making unlikely tokens more probable.
+**Caveats:**
+- Even T=0 isn't fully deterministic—hardware concurrency and floating-point variations can introduce tiny differences
+- High temperature increases hallucination risk
+- Temperature interacts with top_p; tuning both simultaneously requires care
+### Top_p (Nucleus Sampling)
+Top_p limits sampling to the smallest set of tokens whose cumulative probability exceeds p. This provides a different control over diversity than temperature.
+| Top_p | Effect |
+|-------|--------|
+| 0.1–0.3 | Very focused, few token choices |
+| 0.5–0.7 | Moderate diversity |
+| 0.9–1.0 | Wide sampling, more variation |
+**Temperature vs Top_p:**
+- Temperature affects *all* token probabilities uniformly
+- Top_p dynamically adjusts the candidate pool based on probability mass
+- For most use cases, adjust one and leave the other at default
+- Common pattern: low temperature (0.0–0.3) with top_p=1.0 for structured tasks
+### Provider Temperature Ranges
+| Provider | Range | Default | Notes |
+|----------|-------|---------|-------|
+| OpenAI | 0.0–2.0 | 1.0 | Values >1.0 increase randomness significantly |
+| Anthropic | 0.0–1.0 | 1.0 | Cannot exceed 1.0 |
+| Gemini | 0.0–2.0 | 1.0 | Similar to OpenAI |
+| Ollama | 0.0–1.0+ | 0.7–0.8 | Model-dependent defaults |
+### Phase-Specific Temperature
+Since temperature can be set per API call, use different values for different workflow phases:
+| Phase | Temperature | Rationale |
+|-------|-------------|-----------|
+| Brainstorming/Discuss | 0.7–1.0 | Encourage diverse ideas, exploration |
+| Planning/Freeze | 0.3–0.5 | Balance creativity with coherence |
+| Serialize/Tool calls | 0.0–0.2 | Maximize format compliance |
+| Validation repair | 0.0–0.2 | Deterministic corrections |
+This is particularly relevant for the **Discuss → Freeze → Serialize** pattern described below—each stage benefits from different temperature settings.
+---
 ## Structured Output Pipelines
 Many agent tasks end in a **strict artifact**—JSON/YAML configs, story plans, outlines—rather than free-form prose. Trying to get both *conversation* and *perfectly formatted output* from a single response is brittle, especially for small/local models.
@@ -297,21 +361,23 @@ A more reliable approach is to separate the flow into stages:
 ### Discuss → Freeze → Serialize
-**Discuss:** keep prompts focused on meaning, not field names. Explicitly tell the model *not* to output JSON/YAML during this phase.
+**Discuss** (temperature 0.7–1.0): Keep prompts focused on meaning, not field names. Explicitly tell the model *not* to output JSON/YAML during this phase. Higher temperature encourages diverse ideas and creative exploration.
-**Freeze:** compress decisions into a short summary:
+**Freeze** (temperature 0.3–0.5): Compress decisions into a short summary:
 - 10–30 bullets, one decision per line.
 - No open questions, only resolved choices.
 - Structured enough that a smaller model can follow it reliably.
+- Moderate temperature balances coherence with flexibility.
-**Serialize:** in a separate call:
+**Serialize** (temperature 0.0–0.2): In a separate call:
 - Provide the schema (JSON Schema, typed model, or tool definition).
-- Instruct: *“Output only JSON that matches this schema. No prose, no markdown fences.”*
+- Instruct: *"Output only JSON that matches this schema. No prose, no markdown fences."*
 - Use constrained decoding/tool calling where available.
+- Low temperature maximizes format compliance.
-This separates conversational drift from serialization, which significantly improves reliability for structured outputs like story plans, world-bible slices, or configuration objects.
+This separates conversational drift from serialization, which significantly improves reliability for structured outputs like story plans, world-bible slices, or configuration objects. The temperature gradient—high for exploration, low for precision—matches each phase's purpose.
 ### Tool-Gated Finalization
@@ -363,7 +429,108 @@ When a candidate fails validation, the repair prompt should:
 > “Return a corrected JSON object that fixes **only** these errors. Do not change fields that are not mentioned. Output only JSON.”
-For small models, keep error descriptions compact and concrete rather than abstract (“string too long: 345 > max 200”).
+For small models, keep error descriptions compact and concrete rather than abstract ("string too long: 345 > max 200").
+### Structured Validation Feedback
+Rather than returning free-form error messages, use a structured feedback format that leverages attention patterns (status first, action last) and distinguishes error types clearly.
+**Result Categories**
+Use a semantic result enum rather than boolean success/failure:
+| Result | Meaning | Model Action |
+|--------|---------|--------------|
+| `accepted` | Validation passed, artifact stored | Proceed to next step |
+| `validation_failed` | Content issues the model can fix | Repair and resubmit |
+| `tool_error` | Infrastructure failure | Retry unchanged or escalate |
+This distinction matters: `validation_failed` tells the model its *content* was wrong (fixable), while `tool_error` indicates the tool itself failed (retry or give up).
+**Error Categorization**
+Group validation errors by type to help the model understand what went wrong:
+```json
+{
+  "result": "validation_failed",
+  "issues": {
+    "invalid": [
+      {"field": "estimated_passages", "value": 15, "requirement": "must be 1-10"}
+    ],
+    "missing": ["protagonist_name", "setting"],
+    "unknown": ["passages"]
+  },
+  "issue_count": {"invalid": 1, "missing": 2, "unknown": 1},
+  "action": "Fix the 4 issues above and resubmit. Use exact field names from the schema."
+}
+```
+| Category | Meaning | Common Cause |
+|----------|---------|--------------|
+| `invalid` | Field present but value wrong | Constraint violation, wrong type |
+| `missing` | Required field not provided | Omission, incomplete output |
+| `unknown` | Field not in schema | Typo, hallucinated field name |
+The `unknown` category is particularly valuable—it catches near-misses like `passages` instead of `estimated_passages` that would otherwise appear as "missing" with no hint about the typo.
+**Field Ordering (Primacy/Recency)**
+Structure feedback to exploit the U-shaped attention curve:
+1. **Result status** (first—immediate orientation)
+2. **Issues by category** (middle—detailed content)
+3. **Issue count** (severity summary)
+4. **Action instructions** (last—what to do next)
+**What NOT to Include**
+| Avoid | Why |
+|-------|-----|
+| Full schema | Already in tool definition; wastes tokens in retry loops |
+| Boolean `success` field | Ambiguous; use semantic result categories instead |
+| Generic hints | Replace with actionable, field-specific instructions |
+| Valid fields | Only describe what failed, not what succeeded |
+**Example: Before and After**
+Anti-pattern (vague, wastes tokens):
+```
+Error: Validation failed. Expected fields: type, title, protagonist_name,
+setting, theme, estimated_passages, tone. Please check your submission
+and ensure all required fields are present with valid values.
+```
+Better (specific, actionable):
+```json
+{
+  "result": "validation_failed",
+  "issues": {
+    "invalid": [{"field": "type", "value": "story", "requirement": "must be 'dream'"}],
+    "missing": ["protagonist_name"],
+    "unknown": ["passages"]
+  },
+  "action": "Fix these 3 issues. Did you mean 'estimated_passages' instead of 'passages'?"
+}
+```
+The improved version:
+- Names the exact fields that failed
+- Suggests the likely typo (`passages` → `estimated_passages`)
+- Doesn't repeat schema information already available to the model
+- Ends with a clear action instruction (primacy/recency)
+### Retry Budget and Token Efficiency
+Validation loops consume tokens. Design for efficiency:
+- **Cap retries**: 2-3 attempts is usually sufficient; more indicates a prompt or schema problem
+- **Escalate gracefully**: After retry budget exhausted, surface a clear failure rather than looping
+- **Track retry rates**: High retry rates signal opportunities for prompt improvement or schema simplification
+- **Consider model capability**: Less capable models may need higher retry budgets but with simpler feedback
 ### Best Practices
@@ -528,9 +695,12 @@ Before deploying:
 ## Provider-Specific Optimizations
-- **Anthropic**: Use `token-efficient-tools` beta header for up to 70% output token reduction
-- **OpenAI**: Consider fine-tuning for frequently-used patterns
-- **Local models**: Tool retrieval essential—small models struggle with 10+ tools
+- **Anthropic**: Use `token-efficient-tools` beta header for up to 70% output token reduction; temperature capped at 1.0
+- **OpenAI**: Consider fine-tuning for frequently-used patterns; temperature range 0.0–2.0
+- **Gemini**: Temperature range 0.0–2.0, similar behavior to OpenAI
+- **Ollama/Local**: Tool retrieval essential—small models struggle with 10+ tools; default temperature varies by model (typically 0.7–0.8)
+See [Sampling Parameters](#sampling-parameters) for detailed temperature guidance by use case.
 ---
@@ -549,6 +719,8 @@ Before deploying:
 | Dynamic few-shot | Static example bloat | Retrieve relevant examples |
 | Reflection | Quality failures | Draft → critique → refine |
 | Context pruning | Context rot | Summarize and remove stale turns |
+| Structured feedback | Vague validation errors | Categorize issues (invalid/missing/unknown) |
+| Phase-specific temperature | Format errors in structured output | High temp for discuss, low for serialize |
 | Model Class | Max Prompt | Max Tools | Strategy |
 |-------------|------------|-----------|----------|
@@ -567,6 +739,8 @@ Before deploying:
 | RAG-MCP (2025) | Two-stage selection reduces tokens 50%+, improves accuracy 3x |
 | Anthropic Token-Efficient Tools | Schema optimization reduces output tokens 70% |
 | Reflexion research | Self-correction improves quality on complex tasks |
+| STROT Framework (2025) | Structured feedback loops achieve 95% first-attempt success |
+| AWS Evaluator-Optimizer | Semantic reflection enables self-improving validation |
 ---

ifcraftcorpus-1.2.1.data/data/share/ifcraftcorpus/subagents/README.md ADDED Viewed

@@ -0,0 +1,198 @@
+# IF Craft Corpus Subagents
+Specialized agent templates for Interactive Fiction authoring workflows. These templates provide system prompts for LLM agents that can assist with different aspects of IF creation.
+## Overview
+The subagents follow a **hub-and-spoke orchestration pattern** where specialized agents handle specific tasks:
+| Agent | Archetype | Role |
+|-------|-----------|------|
+| **Story Architect** | Orchestrator | Plans narrative structure, decomposes projects, coordinates creation |
+| **Prose Writer** | Creator | Writes narrative prose, dialogue, and scene text |
+| **Quality Reviewer** | Validator | Reviews content for quality, consistency, and standards |
+| **Genre Consultant** | Researcher | Provides genre-specific guidance on conventions and tropes |
+| **World Curator** | Curator | Maintains world consistency, manages canon |
+| **Platform Advisor** | Researcher | Guides tool/platform selection and technical implementation |
+## Usage
+### Via MCP Prompts (Recommended)
+When using the IF Craft Corpus MCP server, subagents are exposed as **prompts** that can be retrieved and used as system prompts for agents:
+```python
+# Using FastMCP client
+from fastmcp import Client
+async with Client("ifcraftcorpus-mcp") as client:
+    # List available subagents
+    prompts = await client.list_prompts()
+    # Get a specific prompt
+    result = await client.get_prompt(
+        "if_story_architect",
+        arguments={"project_name": "My IF Game", "genre": "mystery"}
+    )
+    # Use the prompt content as a system prompt
+    system_prompt = result.messages[0].content.text
+```
+### Via MCP Tool
+You can also use the `list_subagents` tool to discover available agents:
+```python
+subagents = await client.call_tool("list_subagents")
+# Returns list of agents with name, description, archetype, and parameters
+```
+### Direct File Access
+The markdown templates can also be read directly:
+```python
+from pathlib import Path
+# In development
+template = Path("subagents/if_prose_writer.md").read_text()
+# In installed package
+import sys
+template_path = Path(sys.prefix) / "share" / "ifcraftcorpus" / "subagents" / "if_prose_writer.md"
+template = template_path.read_text()
+```
+## Agent Details
+### IF Story Architect
+**Archetype:** Orchestrator
+**Parameters:** `project_name`, `genre`
+Plans and coordinates IF projects without writing content itself. Responsibilities:
+- Design narrative topology (time cave, branch-and-bottleneck, QBN, etc.)
+- Decompose projects into scenes and branches
+- Plan emotional arcs across branches
+- Create scene briefs for content creators
+**When to use:** At project start to plan structure, or when restructuring.
+---
+### IF Prose Writer
+**Archetype:** Creator
+**Parameters:** `genre`, `pov`
+Creates narrative content from briefs. Responsibilities:
+- Write scene prose and dialogue
+- Maintain character voice consistency
+- Handle POV and exposition
+- Create choice text
+**When to use:** For actual content creation from scene briefs.
+---
+### IF Quality Reviewer
+**Archetype:** Validator
+**Parameters:** `focus_areas`
+Reviews content for quality issues. Responsibilities:
+- Check structural integrity (orphaned content, dead ends)
+- Verify voice and style consistency
+- Validate canon and continuity
+- Audit accessibility compliance
+**When to use:** After content creation, before publishing.
+---
+### IF Genre Consultant
+**Archetype:** Researcher
+**Parameters:** `primary_genre`, `secondary_genre`
+Provides genre-specific guidance. Responsibilities:
+- Explain genre conventions and expectations
+- Suggest appropriate tropes and subversions
+- Advise on cross-genre blending
+- Guide tone and style
+**When to use:** During planning, or when genre questions arise.
+---
+### IF World Curator
+**Archetype:** Curator
+**Parameters:** `world_name`, `setting_type`
+Maintains world consistency. Responsibilities:
+- Track canon facts across branches
+- Manage timeline and character states
+- Flag contradictions
+- Maintain world bible
+**When to use:** Throughout project to maintain consistency.
+---
+### IF Platform Advisor
+**Archetype:** Researcher
+**Parameters:** `target_platform`, `team_size`
+Guides technical decisions. Responsibilities:
+- Compare IF platforms (Twine, Ink, ChoiceScript, etc.)
+- Recommend tools based on project needs
+- Advise on workflow and collaboration
+- Guide integration strategies
+**When to use:** At project start for platform selection, or when evaluating tools.
+## Corpus Integration
+All subagents are designed to use the IF Craft Corpus MCP tools:
+- `search_corpus(query, cluster?, limit?)` - Find relevant guidance
+- `get_document(name)` - Retrieve full document
+- `list_documents(cluster?)` - Discover available guidance
+Each template includes guidance on which corpus clusters are most relevant for that agent's work.
+## Web Research
+Subagents are also encouraged to use web search for:
+- Historical/factual accuracy
+- Current platform documentation
+- Published IF examples
+- Domain-specific knowledge
+## Design Principles
+These templates follow patterns from the corpus's own agent design documents:
+1. **Sandwich Pattern** - Critical constraints at start AND end of prompt
+2. **Menu + Consult** - Summary in prompt, retrieve details on demand
+3. **Clear Archetypes** - Each agent has a defined role and boundaries
+4. **Neutral Tool Descriptions** - Descriptive, not prescriptive
+## Extending
+To create custom subagents:
+1. Copy an existing template as a starting point
+2. Modify the role, responsibilities, and workflow sections
+3. Update the corpus cluster references for your agent's domain
+4. Add any custom output formats needed
+5. Register as an MCP prompt if desired
+## License
+These templates are part of the IF Craft Corpus package:
+- **Code**: MIT License
+- **Content**: CC-BY-4.0

ifcraftcorpus 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

ifcraftcorpus 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl