ifcraftcorpus 1.1.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/PKG-INFO +18 -1
  2. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/README.md +17 -0
  3. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/agent-design/agent_prompt_engineering.md +183 -9
  4. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/pyproject.toml +14 -1
  5. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/cli.py +54 -5
  6. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/embeddings.py +11 -7
  7. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/index.py +26 -4
  8. ifcraftcorpus-1.2.1/src/ifcraftcorpus/logging_utils.py +84 -0
  9. ifcraftcorpus-1.2.1/src/ifcraftcorpus/mcp_server.py +806 -0
  10. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/providers.py +4 -4
  11. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/search.py +60 -12
  12. ifcraftcorpus-1.2.1/subagents/README.md +198 -0
  13. ifcraftcorpus-1.2.1/subagents/if_genre_consultant.md +257 -0
  14. ifcraftcorpus-1.2.1/subagents/if_platform_advisor.md +306 -0
  15. ifcraftcorpus-1.2.1/subagents/if_prose_writer.md +187 -0
  16. ifcraftcorpus-1.2.1/subagents/if_quality_reviewer.md +245 -0
  17. ifcraftcorpus-1.2.1/subagents/if_story_architect.md +162 -0
  18. ifcraftcorpus-1.2.1/subagents/if_world_curator.md +280 -0
  19. ifcraftcorpus-1.1.0/src/ifcraftcorpus/mcp_server.py +0 -410
  20. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/.gitignore +0 -0
  21. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/LICENSE +0 -0
  22. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/LICENSE-CONTENT +0 -0
  23. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/agent-design/multi_agent_patterns.md +0 -0
  24. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/accessibility_guidelines.md +0 -0
  25. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/audience_targeting.md +0 -0
  26. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/localization_considerations.md +0 -0
  27. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/audio_visual_integration.md +0 -0
  28. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/collaborative_if_writing.md +0 -0
  29. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/creative_workflow_pipeline.md +0 -0
  30. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/diegetic_design.md +0 -0
  31. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/idea_capture_and_hooks.md +0 -0
  32. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/if_platform_tools.md +0 -0
  33. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/player_analytics_metrics.md +0 -0
  34. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/quality_standards_if.md +0 -0
  35. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/research_and_verification.md +0 -0
  36. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/testing_interactive_fiction.md +0 -0
  37. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/emotional-design/conflict_patterns.md +0 -0
  38. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/emotional-design/emotional_beats.md +0 -0
  39. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/game-design/mechanics_design_patterns.md +0 -0
  40. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/children_and_ya_conventions.md +0 -0
  41. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/fantasy_conventions.md +0 -0
  42. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/historical_fiction.md +0 -0
  43. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/horror_conventions.md +0 -0
  44. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/mystery_conventions.md +0 -0
  45. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/sci_fi_conventions.md +0 -0
  46. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_construction.md +0 -0
  47. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_craft.md +0 -0
  48. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/endings_patterns.md +0 -0
  49. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/episodic_serialized_if.md +0 -0
  50. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/nonlinear_structure.md +0 -0
  51. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/pacing_and_tension.md +0 -0
  52. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/romance_and_relationships.md +0 -0
  53. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_structure_and_beats.md +0 -0
  54. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_transitions.md +0 -0
  55. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/character_voice.md +0 -0
  56. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/dialogue_craft.md +0 -0
  57. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/exposition_techniques.md +0 -0
  58. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/narrative_point_of_view.md +0 -0
  59. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/prose_patterns.md +0 -0
  60. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/subtext_and_implication.md +0 -0
  61. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/voice_register_consistency.md +0 -0
  62. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/scope-and-planning/scope_and_length.md +0 -0
  63. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/canon_management.md +0 -0
  64. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/setting_as_character.md +0 -0
  65. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/worldbuilding_patterns.md +0 -0
  66. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/__init__.py +0 -0
  67. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/parser.py +0 -0
  68. {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ifcraftcorpus
3
- Version: 1.1.0
3
+ Version: 1.2.1
4
4
  Summary: Interactive fiction craft corpus with search library and MCP server
5
5
  Project-URL: Homepage, https://pvliesdonk.github.io/if-craft-corpus
6
6
  Project-URL: Repository, https://github.com/pvliesdonk/if-craft-corpus
@@ -124,6 +124,23 @@ results = corpus.search(
124
124
  | agent-design | 2 | Multi-agent patterns, prompt engineering |
125
125
  | game-design | 1 | Mechanics design patterns |
126
126
 
127
+ ## Verbose Logging
128
+
129
+ Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
130
+ before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
131
+ emit detailed logs to stderr. Example:
132
+
133
+ ```bash
134
+ LOG_LEVEL=DEBUG ifcraftcorpus-mcp
135
+
136
+ # Docker
137
+ docker run -p 8000:8000 \
138
+ -e LOG_LEVEL=DEBUG \
139
+ ghcr.io/pvliesdonk/if-craft-corpus
140
+ ```
141
+
142
+ Logs never touch stdout, so stdio transports remain compatible.
143
+
127
144
  ## Documentation
128
145
 
129
146
  Full documentation: https://pvliesdonk.github.io/if-craft-corpus
@@ -71,6 +71,23 @@ results = corpus.search(
71
71
  | agent-design | 2 | Multi-agent patterns, prompt engineering |
72
72
  | game-design | 1 | Mechanics design patterns |
73
73
 
74
+ ## Verbose Logging
75
+
76
+ Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
77
+ before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
78
+ emit detailed logs to stderr. Example:
79
+
80
+ ```bash
81
+ LOG_LEVEL=DEBUG ifcraftcorpus-mcp
82
+
83
+ # Docker
84
+ docker run -p 8000:8000 \
85
+ -e LOG_LEVEL=DEBUG \
86
+ ghcr.io/pvliesdonk/if-craft-corpus
87
+ ```
88
+
89
+ Logs never touch stdout, so stdio transports remain compatible.
90
+
74
91
  ## Documentation
75
92
 
76
93
  Full documentation: https://pvliesdonk.github.io/if-craft-corpus
@@ -285,6 +285,70 @@ Small models may interpret as "never validate" or "always validate."
285
285
 
286
286
  ---
287
287
 
288
+ ## Sampling Parameters
289
+
290
+ Sampling parameters control the randomness and diversity of LLM outputs. The two most important are **temperature** and **top_p**. These can be set per API call, enabling different settings for different phases of a workflow.
291
+
292
+ ### Temperature
293
+
294
+ Temperature controls the probability distribution over tokens. Lower values make the model more deterministic; higher values increase randomness and creativity.
295
+
296
+ | Temperature | Effect | Use Cases |
297
+ |-------------|--------|-----------|
298
+ | 0.0–0.2 | Highly deterministic, consistent | Structured output, tool calling, factual responses |
299
+ | 0.3–0.5 | Balanced, slight variation | General conversation, summarization |
300
+ | 0.6–0.8 | More creative, diverse | Brainstorming, draft generation |
301
+ | 0.9–1.0+ | High randomness, exploratory | Creative writing, idea exploration, poetry |
302
+
303
+ **How it works:** Temperature scales the logits (pre-softmax scores) before sampling. At T=0, the model always picks the highest-probability token. At T>1, probability differences flatten, making unlikely tokens more probable.
304
+
305
+ **Caveats:**
306
+
307
+ - Even T=0 isn't fully deterministic—hardware concurrency and floating-point variations can introduce tiny differences
308
+ - High temperature increases hallucination risk
309
+ - Temperature interacts with top_p; tuning both simultaneously requires care
310
+
311
+ ### Top_p (Nucleus Sampling)
312
+
313
+ Top_p limits sampling to the smallest set of tokens whose cumulative probability exceeds p. This provides a different control over diversity than temperature.
314
+
315
+ | Top_p | Effect |
316
+ |-------|--------|
317
+ | 0.1–0.3 | Very focused, few token choices |
318
+ | 0.5–0.7 | Moderate diversity |
319
+ | 0.9–1.0 | Wide sampling, more variation |
320
+
321
+ **Temperature vs Top_p:**
322
+
323
+ - Temperature affects *all* token probabilities uniformly
324
+ - Top_p dynamically adjusts the candidate pool based on probability mass
325
+ - For most use cases, adjust one and leave the other at default
326
+ - Common pattern: low temperature (0.0–0.3) with top_p=1.0 for structured tasks
327
+
328
+ ### Provider Temperature Ranges
329
+
330
+ | Provider | Range | Default | Notes |
331
+ |----------|-------|---------|-------|
332
+ | OpenAI | 0.0–2.0 | 1.0 | Values >1.0 increase randomness significantly |
333
+ | Anthropic | 0.0–1.0 | 1.0 | Cannot exceed 1.0 |
334
+ | Gemini | 0.0–2.0 | 1.0 | Similar to OpenAI |
335
+ | Ollama | 0.0–1.0+ | 0.7–0.8 | Model-dependent defaults |
336
+
337
+ ### Phase-Specific Temperature
338
+
339
+ Since temperature can be set per API call, use different values for different workflow phases:
340
+
341
+ | Phase | Temperature | Rationale |
342
+ |-------|-------------|-----------|
343
+ | Brainstorming/Discuss | 0.7–1.0 | Encourage diverse ideas, exploration |
344
+ | Planning/Freeze | 0.3–0.5 | Balance creativity with coherence |
345
+ | Serialize/Tool calls | 0.0–0.2 | Maximize format compliance |
346
+ | Validation repair | 0.0–0.2 | Deterministic corrections |
347
+
348
+ This is particularly relevant for the **Discuss → Freeze → Serialize** pattern described below—each stage benefits from different temperature settings.
349
+
350
+ ---
351
+
288
352
  ## Structured Output Pipelines
289
353
 
290
354
  Many agent tasks end in a **strict artifact**—JSON/YAML configs, story plans, outlines—rather than free-form prose. Trying to get both *conversation* and *perfectly formatted output* from a single response is brittle, especially for small/local models.
@@ -297,21 +361,23 @@ A more reliable approach is to separate the flow into stages:
297
361
 
298
362
  ### Discuss → Freeze → Serialize
299
363
 
300
- **Discuss:** keep prompts focused on meaning, not field names. Explicitly tell the model *not* to output JSON/YAML during this phase.
364
+ **Discuss** (temperature 0.7–1.0): Keep prompts focused on meaning, not field names. Explicitly tell the model *not* to output JSON/YAML during this phase. Higher temperature encourages diverse ideas and creative exploration.
301
365
 
302
- **Freeze:** compress decisions into a short summary:
366
+ **Freeze** (temperature 0.3–0.5): Compress decisions into a short summary:
303
367
 
304
368
  - 10–30 bullets, one decision per line.
305
369
  - No open questions, only resolved choices.
306
370
  - Structured enough that a smaller model can follow it reliably.
371
+ - Moderate temperature balances coherence with flexibility.
307
372
 
308
- **Serialize:** in a separate call:
373
+ **Serialize** (temperature 0.0–0.2): In a separate call:
309
374
 
310
375
  - Provide the schema (JSON Schema, typed model, or tool definition).
311
- - Instruct: *“Output only JSON that matches this schema. No prose, no markdown fences.”*
376
+ - Instruct: *"Output only JSON that matches this schema. No prose, no markdown fences."*
312
377
  - Use constrained decoding/tool calling where available.
378
+ - Low temperature maximizes format compliance.
313
379
 
314
- This separates conversational drift from serialization, which significantly improves reliability for structured outputs like story plans, world-bible slices, or configuration objects.
380
+ This separates conversational drift from serialization, which significantly improves reliability for structured outputs like story plans, world-bible slices, or configuration objects. The temperature gradient—high for exploration, low for precision—matches each phase's purpose.
315
381
 
316
382
  ### Tool-Gated Finalization
317
383
 
@@ -363,7 +429,108 @@ When a candidate fails validation, the repair prompt should:
363
429
 
364
430
  > “Return a corrected JSON object that fixes **only** these errors. Do not change fields that are not mentioned. Output only JSON.”
365
431
 
366
- For small models, keep error descriptions compact and concrete rather than abstract (string too long: 345 > max 200).
432
+ For small models, keep error descriptions compact and concrete rather than abstract ("string too long: 345 > max 200").
433
+
434
+ ### Structured Validation Feedback
435
+
436
+ Rather than returning free-form error messages, use a structured feedback format that leverages attention patterns (status first, action last) and distinguishes error types clearly.
437
+
438
+ **Result Categories**
439
+
440
+ Use a semantic result enum rather than boolean success/failure:
441
+
442
+ | Result | Meaning | Model Action |
443
+ |--------|---------|--------------|
444
+ | `accepted` | Validation passed, artifact stored | Proceed to next step |
445
+ | `validation_failed` | Content issues the model can fix | Repair and resubmit |
446
+ | `tool_error` | Infrastructure failure | Retry unchanged or escalate |
447
+
448
+ This distinction matters: `validation_failed` tells the model its *content* was wrong (fixable), while `tool_error` indicates the tool itself failed (retry or give up).
449
+
450
+ **Error Categorization**
451
+
452
+ Group validation errors by type to help the model understand what went wrong:
453
+
454
+ ```json
455
+ {
456
+ "result": "validation_failed",
457
+ "issues": {
458
+ "invalid": [
459
+ {"field": "estimated_passages", "value": 15, "requirement": "must be 1-10"}
460
+ ],
461
+ "missing": ["protagonist_name", "setting"],
462
+ "unknown": ["passages"]
463
+ },
464
+ "issue_count": {"invalid": 1, "missing": 2, "unknown": 1},
465
+ "action": "Fix the 4 issues above and resubmit. Use exact field names from the schema."
466
+ }
467
+ ```
468
+
469
+ | Category | Meaning | Common Cause |
470
+ |----------|---------|--------------|
471
+ | `invalid` | Field present but value wrong | Constraint violation, wrong type |
472
+ | `missing` | Required field not provided | Omission, incomplete output |
473
+ | `unknown` | Field not in schema | Typo, hallucinated field name |
474
+
475
+ The `unknown` category is particularly valuable—it catches near-misses like `passages` instead of `estimated_passages` that would otherwise appear as "missing" with no hint about the typo.
476
+
477
+ **Field Ordering (Primacy/Recency)**
478
+
479
+ Structure feedback to exploit the U-shaped attention curve:
480
+
481
+ 1. **Result status** (first—immediate orientation)
482
+ 2. **Issues by category** (middle—detailed content)
483
+ 3. **Issue count** (severity summary)
484
+ 4. **Action instructions** (last—what to do next)
485
+
486
+ **What NOT to Include**
487
+
488
+ | Avoid | Why |
489
+ |-------|-----|
490
+ | Full schema | Already in tool definition; wastes tokens in retry loops |
491
+ | Boolean `success` field | Ambiguous; use semantic result categories instead |
492
+ | Generic hints | Replace with actionable, field-specific instructions |
493
+ | Valid fields | Only describe what failed, not what succeeded |
494
+
495
+ **Example: Before and After**
496
+
497
+ Anti-pattern (vague, wastes tokens):
498
+
499
+ ```
500
+ Error: Validation failed. Expected fields: type, title, protagonist_name,
501
+ setting, theme, estimated_passages, tone. Please check your submission
502
+ and ensure all required fields are present with valid values.
503
+ ```
504
+
505
+ Better (specific, actionable):
506
+
507
+ ```json
508
+ {
509
+ "result": "validation_failed",
510
+ "issues": {
511
+ "invalid": [{"field": "type", "value": "story", "requirement": "must be 'dream'"}],
512
+ "missing": ["protagonist_name"],
513
+ "unknown": ["passages"]
514
+ },
515
+ "action": "Fix these 3 issues. Did you mean 'estimated_passages' instead of 'passages'?"
516
+ }
517
+ ```
518
+
519
+ The improved version:
520
+
521
+ - Names the exact fields that failed
522
+ - Suggests the likely typo (`passages` → `estimated_passages`)
523
+ - Doesn't repeat schema information already available to the model
524
+ - Ends with a clear action instruction (primacy/recency)
525
+
526
+ ### Retry Budget and Token Efficiency
527
+
528
+ Validation loops consume tokens. Design for efficiency:
529
+
530
+ - **Cap retries**: 2-3 attempts is usually sufficient; more indicates a prompt or schema problem
531
+ - **Escalate gracefully**: After retry budget exhausted, surface a clear failure rather than looping
532
+ - **Track retry rates**: High retry rates signal opportunities for prompt improvement or schema simplification
533
+ - **Consider model capability**: Less capable models may need higher retry budgets but with simpler feedback
367
534
 
368
535
  ### Best Practices
369
536
 
@@ -528,9 +695,12 @@ Before deploying:
528
695
 
529
696
  ## Provider-Specific Optimizations
530
697
 
531
- - **Anthropic**: Use `token-efficient-tools` beta header for up to 70% output token reduction
532
- - **OpenAI**: Consider fine-tuning for frequently-used patterns
533
- - **Local models**: Tool retrieval essential—small models struggle with 10+ tools
698
+ - **Anthropic**: Use `token-efficient-tools` beta header for up to 70% output token reduction; temperature capped at 1.0
699
+ - **OpenAI**: Consider fine-tuning for frequently-used patterns; temperature range 0.0–2.0
700
+ - **Gemini**: Temperature range 0.0–2.0, similar behavior to OpenAI
701
+ - **Ollama/Local**: Tool retrieval essential—small models struggle with 10+ tools; default temperature varies by model (typically 0.7–0.8)
702
+
703
+ See [Sampling Parameters](#sampling-parameters) for detailed temperature guidance by use case.
534
704
 
535
705
  ---
536
706
 
@@ -549,6 +719,8 @@ Before deploying:
549
719
  | Dynamic few-shot | Static example bloat | Retrieve relevant examples |
550
720
  | Reflection | Quality failures | Draft → critique → refine |
551
721
  | Context pruning | Context rot | Summarize and remove stale turns |
722
+ | Structured feedback | Vague validation errors | Categorize issues (invalid/missing/unknown) |
723
+ | Phase-specific temperature | Format errors in structured output | High temp for discuss, low for serialize |
552
724
 
553
725
  | Model Class | Max Prompt | Max Tools | Strategy |
554
726
  |-------------|------------|-----------|----------|
@@ -567,6 +739,8 @@ Before deploying:
567
739
  | RAG-MCP (2025) | Two-stage selection reduces tokens 50%+, improves accuracy 3x |
568
740
  | Anthropic Token-Efficient Tools | Schema optimization reduces output tokens 70% |
569
741
  | Reflexion research | Self-correction improves quality on complex tasks |
742
+ | STROT Framework (2025) | Structured feedback loops achieve 95% first-attempt success |
743
+ | AWS Evaluator-Optimizer | Semantic reflection enables self-improving validation |
570
744
 
571
745
  ---
572
746
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ifcraftcorpus"
3
- version = "1.1.0"
3
+ version = "1.2.1"
4
4
  description = "Interactive fiction craft corpus with search library and MCP server"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -80,6 +80,7 @@ build-backend = "hatchling.build"
80
80
  include = [
81
81
  "/src",
82
82
  "/corpus",
83
+ "/subagents",
83
84
  ]
84
85
 
85
86
  [tool.hatch.build.targets.wheel]
@@ -87,6 +88,7 @@ packages = ["src/ifcraftcorpus"]
87
88
 
88
89
  [tool.hatch.build.targets.wheel.shared-data]
89
90
  "corpus" = "share/ifcraftcorpus/corpus"
91
+ "subagents" = "share/ifcraftcorpus/subagents"
90
92
 
91
93
  [tool.ruff]
92
94
  line-length = 100
@@ -101,6 +103,17 @@ strict = true
101
103
  warn_return_any = true
102
104
  warn_unused_ignores = true
103
105
 
106
+ [[tool.mypy.overrides]]
107
+ module = [
108
+ "fastmcp",
109
+ "fastmcp.prompts",
110
+ "mcp.*",
111
+ "sentence_transformers",
112
+ "numpy",
113
+ "httpx",
114
+ ]
115
+ ignore_missing_imports = true
116
+
104
117
  [tool.pytest.ini_options]
105
118
  testpaths = ["tests"]
106
119
  addopts = "-v --tb=short"
@@ -17,8 +17,26 @@ from __future__ import annotations
17
17
 
18
18
  import argparse
19
19
  import json
20
+ import logging
20
21
  import sys
21
22
  from pathlib import Path
23
+ from typing import TYPE_CHECKING
24
+
25
+ if TYPE_CHECKING:
26
+ from ifcraftcorpus.providers import EmbeddingProvider
27
+
28
+ from ifcraftcorpus.logging_utils import configure_logging
29
+
30
+ configure_logging()
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ def _truncate(value: str, limit: int = 120) -> str:
35
+ """Shorten long log values to keep CLI logs readable."""
36
+
37
+ if len(value) <= limit:
38
+ return value
39
+ return f"{value[:limit]}..."
22
40
 
23
41
 
24
42
  def cmd_info(args: argparse.Namespace) -> int:
@@ -26,12 +44,19 @@ def cmd_info(args: argparse.Namespace) -> int:
26
44
  from ifcraftcorpus import Corpus, __version__
27
45
 
28
46
  corpus = Corpus()
47
+ clusters = corpus.list_clusters()
48
+ logger.info(
49
+ "CLI info command: version=%s docs=%s clusters=%s",
50
+ __version__,
51
+ corpus.document_count(),
52
+ len(clusters),
53
+ )
29
54
 
30
55
  print(f"\nIF Craft Corpus v{__version__}")
31
56
  print(f"Documents: {corpus.document_count()}")
32
- print(f"Clusters: {len(corpus.list_clusters())}")
57
+ print(f"Clusters: {len(clusters)}")
33
58
  print("\nClusters:")
34
- for cluster in corpus.list_clusters():
59
+ for cluster in clusters:
35
60
  docs = [d for d in corpus.list_documents() if d["cluster"] == cluster]
36
61
  print(f" {cluster}: {len(docs)} file(s)")
37
62
 
@@ -43,6 +68,12 @@ def cmd_search(args: argparse.Namespace) -> int:
43
68
  from ifcraftcorpus import Corpus
44
69
 
45
70
  corpus = Corpus()
71
+ logger.info(
72
+ "CLI search query=%r cluster=%s limit=%s",
73
+ _truncate(args.query),
74
+ args.cluster,
75
+ args.limit,
76
+ )
46
77
  results = corpus.search(
47
78
  args.query,
48
79
  limit=args.limit,
@@ -51,6 +82,7 @@ def cmd_search(args: argparse.Namespace) -> int:
51
82
  )
52
83
 
53
84
  if not results:
85
+ logger.info("CLI search returned no matches")
54
86
  print("No results found.")
55
87
  return 0
56
88
 
@@ -65,6 +97,7 @@ def cmd_search(args: argparse.Namespace) -> int:
65
97
  content += "..."
66
98
  print(f" {content}")
67
99
 
100
+ logger.info("CLI search returned %s results", len(results))
68
101
  return 0
69
102
 
70
103
 
@@ -77,6 +110,7 @@ def cmd_embeddings_status(args: argparse.Namespace) -> int:
77
110
  get_embedding_provider,
78
111
  )
79
112
 
113
+ logger.debug("CLI embeddings status requested")
80
114
  print("\n=== Embedding Providers ===\n")
81
115
 
82
116
  # Check each provider
@@ -129,7 +163,7 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
129
163
  )
130
164
 
131
165
  # Get provider
132
- provider = None
166
+ provider: EmbeddingProvider | None = None
133
167
  if args.provider:
134
168
  if args.provider == "ollama":
135
169
  provider = OllamaEmbeddings(model=args.model, host=args.ollama_host)
@@ -152,12 +186,19 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
152
186
  print(f"Provider {provider.provider_name} is not available.", file=sys.stderr)
153
187
  return 1
154
188
 
189
+ logger.info(
190
+ "CLI embeddings build provider=%s model=%s output=%s",
191
+ provider.provider_name,
192
+ provider.model,
193
+ args.output,
194
+ )
155
195
  print(f"Using provider: {provider.provider_name}")
156
196
  print(f"Model: {provider.model} ({provider.dimension}d)")
157
197
 
158
198
  # Build embeddings
159
199
  corpus = Corpus()
160
- print(f"\nBuilding embeddings for {corpus.document_count()} documents...")
200
+ doc_total = corpus.document_count()
201
+ print(f"\nBuilding embeddings for {doc_total} documents...")
161
202
 
162
203
  # Use the corpus's internal index
163
204
  embedding_index = EmbeddingIndex(provider=provider)
@@ -214,6 +255,12 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
214
255
  output_path = Path(args.output)
215
256
  embedding_index.save(output_path)
216
257
 
258
+ logger.info(
259
+ "CLI embeddings build completed docs=%s sections=%s output=%s",
260
+ doc_count,
261
+ section_count,
262
+ output_path,
263
+ )
217
264
  print(f"\nDone! Embedded {section_count} sections from {doc_count} documents.")
218
265
  print(f"Saved to: {output_path}")
219
266
 
@@ -273,7 +320,9 @@ def main() -> int:
273
320
  emb_parser.print_help()
274
321
  return 0
275
322
 
276
- return args.func(args)
323
+ logger.debug("CLI command executed: %s", args.command)
324
+ result: int = args.func(args)
325
+ return result
277
326
 
278
327
 
279
328
  if __name__ == "__main__":
@@ -44,10 +44,13 @@ from __future__ import annotations
44
44
  import json
45
45
  import logging
46
46
  from pathlib import Path
47
- from typing import TYPE_CHECKING
47
+ from typing import TYPE_CHECKING, Any
48
48
 
49
49
  import numpy as np
50
50
 
51
+ if TYPE_CHECKING:
52
+ from sentence_transformers import SentenceTransformer
53
+
51
54
  if TYPE_CHECKING:
52
55
  from ifcraftcorpus.index import CorpusIndex
53
56
  from ifcraftcorpus.providers import EmbeddingProvider
@@ -107,7 +110,8 @@ class EmbeddingIndex:
107
110
  """
108
111
  self._provider = provider
109
112
  self._embeddings: np.ndarray | None = None
110
- self._metadata: list[dict] = []
113
+ self._metadata: list[dict[str, Any]] = []
114
+ self._st_model: SentenceTransformer | None = None
111
115
 
112
116
  # For backward compatibility / persistence
113
117
  if provider:
@@ -117,7 +121,6 @@ class EmbeddingIndex:
117
121
  self.model_name = model_name
118
122
  self._provider_name = "sentence-transformers"
119
123
  # Lazy-load sentence-transformers model
120
- self._st_model = None
121
124
  if not lazy_load:
122
125
  self._load_st_model()
123
126
 
@@ -126,7 +129,7 @@ class EmbeddingIndex:
126
129
  """Get the provider name."""
127
130
  return self._provider_name
128
131
 
129
- def _load_st_model(self):
132
+ def _load_st_model(self) -> SentenceTransformer:
130
133
  """Load sentence-transformers model (fallback)."""
131
134
  if self._st_model is None:
132
135
  try:
@@ -148,12 +151,13 @@ class EmbeddingIndex:
148
151
  else:
149
152
  # Fallback to sentence-transformers
150
153
  model = self._load_st_model()
151
- return model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
154
+ embeddings = model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
155
+ return np.asarray(embeddings)
152
156
 
153
157
  def add_texts(
154
158
  self,
155
159
  texts: list[str],
156
- metadata: list[dict],
160
+ metadata: list[dict[str, Any]],
157
161
  ) -> None:
158
162
  """Add texts with metadata to the index.
159
163
 
@@ -185,7 +189,7 @@ class EmbeddingIndex:
185
189
  *,
186
190
  top_k: int = 10,
187
191
  cluster: str | None = None,
188
- ) -> list[tuple[dict, float]]:
192
+ ) -> list[tuple[dict[str, Any], float]]:
189
193
  """Search for semantically similar texts.
190
194
 
191
195
  Args:
@@ -48,10 +48,31 @@ from __future__ import annotations
48
48
  import sqlite3
49
49
  from dataclasses import dataclass
50
50
  from pathlib import Path
51
+ from typing import Any
51
52
 
52
53
  from ifcraftcorpus.parser import Document, parse_directory
53
54
 
54
55
 
56
+ def _sanitize_fts_query(query: str) -> str:
57
+ """Sanitize a query string for the FTS5 MATCH clause.
58
+
59
+ This function replaces hyphens with spaces to prevent FTS5 from
60
+ interpreting them as the `NOT` operator. This is intended to correctly
61
+ handle natural language queries with hyphenated words, for example
62
+ transforming "haunted-house" into a search for "haunted house".
63
+
64
+ It also collapses any resulting multiple spaces into a single space.
65
+
66
+ Args:
67
+ query: Raw query string from user input.
68
+
69
+ Returns:
70
+ Sanitized query safe for FTS5 MATCH.
71
+ """
72
+ # Replace hyphens and collapse whitespace in one go.
73
+ return " ".join(query.replace("-", " ").split())
74
+
75
+
55
76
  @dataclass
56
77
  class SearchResult:
57
78
  """A search result from the corpus FTS5 index.
@@ -379,8 +400,8 @@ class CorpusIndex:
379
400
  ... cluster="emotional-design",
380
401
  ... limit=5)
381
402
  """
382
- # Build FTS5 query
383
- fts_query = query
403
+ # Build FTS5 query - sanitize to handle special characters
404
+ fts_query = _sanitize_fts_query(query)
384
405
 
385
406
  # Add cluster filter if specified
386
407
  where_clause = ""
@@ -462,7 +483,7 @@ class CorpusIndex:
462
483
  cursor = self.conn.execute("SELECT DISTINCT cluster FROM documents ORDER BY cluster")
463
484
  return [row["cluster"] for row in cursor]
464
485
 
465
- def get_document(self, name: str) -> dict | None:
486
+ def get_document(self, name: str) -> dict[str, Any] | None:
466
487
  """Get a document by name with all its sections.
467
488
 
468
489
  Retrieves complete document data including metadata and all
@@ -535,7 +556,8 @@ class CorpusIndex:
535
556
  Count of documents in the index.
536
557
  """
537
558
  cursor = self.conn.execute("SELECT COUNT(*) FROM documents")
538
- return cursor.fetchone()[0]
559
+ result = cursor.fetchone()
560
+ return int(result[0]) if result else 0
539
561
 
540
562
 
541
563
  def build_index(corpus_dir: Path, output_path: Path) -> CorpusIndex:
@@ -0,0 +1,84 @@
1
+ """Shared logging helpers for the IF Craft Corpus codebase."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import sys
8
+ from typing import Final
9
+
10
+ LOG_LEVEL_ENV: Final[str] = "LOG_LEVEL"
11
+ VERBOSE_ENV: Final[str] = "VERBOSE"
12
+
13
+ __all__ = ["configure_logging", "LOG_LEVEL_ENV", "VERBOSE_ENV"]
14
+
15
+ _TRUTHY_VALUES: Final[set[str]] = {"1", "true", "yes", "on"}
16
+ _configured: bool = False
17
+ _CHATTY_LOGGERS: Final[tuple[str, ...]] = (
18
+ "httpx",
19
+ "fakeredis",
20
+ "docket",
21
+ )
22
+
23
+
24
+ def _is_truthy(value: str | None) -> bool:
25
+ """Return True if the string resembles a truthy flag."""
26
+
27
+ if value is None:
28
+ return False
29
+ return value.strip().lower() in _TRUTHY_VALUES
30
+
31
+
32
+ def _resolve_level(value: str | None) -> int | None:
33
+ """Convert a logging level string (name or integer) to ``int``."""
34
+
35
+ if not value:
36
+ return None
37
+ candidate = value.strip()
38
+ if not candidate:
39
+ return None
40
+ if candidate.isdigit():
41
+ return int(candidate)
42
+ name = candidate.upper()
43
+ return getattr(logging, name, None)
44
+
45
+
46
+ def configure_logging(
47
+ *,
48
+ env_level: str = LOG_LEVEL_ENV,
49
+ env_verbose: str = VERBOSE_ENV,
50
+ fmt: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
51
+ ) -> int | None:
52
+ """Configure root logging when LOG_LEVEL/VERBOSE are set.
53
+
54
+ Returns the configured level when logging is enabled, ``None`` otherwise.
55
+ """
56
+
57
+ global _configured
58
+
59
+ raw_level = os.getenv(env_level)
60
+ level = _resolve_level(raw_level)
61
+ verbose_flag = os.getenv(env_verbose)
62
+
63
+ if raw_level and level is None:
64
+ print(
65
+ f"ifcraftcorpus: unknown log level '{raw_level}', defaulting to INFO",
66
+ file=sys.stderr,
67
+ )
68
+ level = logging.INFO
69
+
70
+ if level is None and not _is_truthy(verbose_flag):
71
+ return None
72
+
73
+ if level is None:
74
+ level = logging.DEBUG
75
+
76
+ root = logging.getLogger()
77
+ if not (root.handlers and _configured):
78
+ logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
79
+ _configured = True
80
+ root.setLevel(level)
81
+
82
+ for name in _CHATTY_LOGGERS:
83
+ logging.getLogger(name).setLevel(max(logging.WARNING, level))
84
+ return level