ifcraftcorpus 1.1.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/PKG-INFO +18 -1
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/README.md +17 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/agent-design/agent_prompt_engineering.md +183 -9
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/pyproject.toml +14 -1
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/cli.py +54 -5
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/embeddings.py +11 -7
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/index.py +26 -4
- ifcraftcorpus-1.2.1/src/ifcraftcorpus/logging_utils.py +84 -0
- ifcraftcorpus-1.2.1/src/ifcraftcorpus/mcp_server.py +806 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/providers.py +4 -4
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/search.py +60 -12
- ifcraftcorpus-1.2.1/subagents/README.md +198 -0
- ifcraftcorpus-1.2.1/subagents/if_genre_consultant.md +257 -0
- ifcraftcorpus-1.2.1/subagents/if_platform_advisor.md +306 -0
- ifcraftcorpus-1.2.1/subagents/if_prose_writer.md +187 -0
- ifcraftcorpus-1.2.1/subagents/if_quality_reviewer.md +245 -0
- ifcraftcorpus-1.2.1/subagents/if_story_architect.md +162 -0
- ifcraftcorpus-1.2.1/subagents/if_world_curator.md +280 -0
- ifcraftcorpus-1.1.0/src/ifcraftcorpus/mcp_server.py +0 -410
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/.gitignore +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/LICENSE +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/LICENSE-CONTENT +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/agent-design/multi_agent_patterns.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/accessibility_guidelines.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/audience_targeting.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/localization_considerations.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/audio_visual_integration.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/collaborative_if_writing.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/creative_workflow_pipeline.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/diegetic_design.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/idea_capture_and_hooks.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/if_platform_tools.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/player_analytics_metrics.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/quality_standards_if.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/research_and_verification.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/testing_interactive_fiction.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/emotional-design/conflict_patterns.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/emotional-design/emotional_beats.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/game-design/mechanics_design_patterns.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/children_and_ya_conventions.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/fantasy_conventions.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/historical_fiction.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/horror_conventions.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/mystery_conventions.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/sci_fi_conventions.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_construction.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_craft.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/endings_patterns.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/episodic_serialized_if.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/nonlinear_structure.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/pacing_and_tension.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/romance_and_relationships.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_structure_and_beats.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_transitions.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/character_voice.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/dialogue_craft.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/exposition_techniques.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/narrative_point_of_view.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/prose_patterns.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/subtext_and_implication.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/voice_register_consistency.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/scope-and-planning/scope_and_length.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/canon_management.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/setting_as_character.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/worldbuilding_patterns.md +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/__init__.py +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/parser.py +0 -0
- {ifcraftcorpus-1.1.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ifcraftcorpus
|
|
3
|
-
Version: 1.1
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: Interactive fiction craft corpus with search library and MCP server
|
|
5
5
|
Project-URL: Homepage, https://pvliesdonk.github.io/if-craft-corpus
|
|
6
6
|
Project-URL: Repository, https://github.com/pvliesdonk/if-craft-corpus
|
|
@@ -124,6 +124,23 @@ results = corpus.search(
|
|
|
124
124
|
| agent-design | 2 | Multi-agent patterns, prompt engineering |
|
|
125
125
|
| game-design | 1 | Mechanics design patterns |
|
|
126
126
|
|
|
127
|
+
## Verbose Logging
|
|
128
|
+
|
|
129
|
+
Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
|
|
130
|
+
before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
|
|
131
|
+
emit detailed logs to stderr. Example:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
LOG_LEVEL=DEBUG ifcraftcorpus-mcp
|
|
135
|
+
|
|
136
|
+
# Docker
|
|
137
|
+
docker run -p 8000:8000 \
|
|
138
|
+
-e LOG_LEVEL=DEBUG \
|
|
139
|
+
ghcr.io/pvliesdonk/if-craft-corpus
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Logs never touch stdout, so stdio transports remain compatible.
|
|
143
|
+
|
|
127
144
|
## Documentation
|
|
128
145
|
|
|
129
146
|
Full documentation: https://pvliesdonk.github.io/if-craft-corpus
|
|
@@ -71,6 +71,23 @@ results = corpus.search(
|
|
|
71
71
|
| agent-design | 2 | Multi-agent patterns, prompt engineering |
|
|
72
72
|
| game-design | 1 | Mechanics design patterns |
|
|
73
73
|
|
|
74
|
+
## Verbose Logging
|
|
75
|
+
|
|
76
|
+
Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
|
|
77
|
+
before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
|
|
78
|
+
emit detailed logs to stderr. Example:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
LOG_LEVEL=DEBUG ifcraftcorpus-mcp
|
|
82
|
+
|
|
83
|
+
# Docker
|
|
84
|
+
docker run -p 8000:8000 \
|
|
85
|
+
-e LOG_LEVEL=DEBUG \
|
|
86
|
+
ghcr.io/pvliesdonk/if-craft-corpus
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Logs never touch stdout, so stdio transports remain compatible.
|
|
90
|
+
|
|
74
91
|
## Documentation
|
|
75
92
|
|
|
76
93
|
Full documentation: https://pvliesdonk.github.io/if-craft-corpus
|
|
@@ -285,6 +285,70 @@ Small models may interpret as "never validate" or "always validate."
|
|
|
285
285
|
|
|
286
286
|
---
|
|
287
287
|
|
|
288
|
+
## Sampling Parameters
|
|
289
|
+
|
|
290
|
+
Sampling parameters control the randomness and diversity of LLM outputs. The two most important are **temperature** and **top_p**. These can be set per API call, enabling different settings for different phases of a workflow.
|
|
291
|
+
|
|
292
|
+
### Temperature
|
|
293
|
+
|
|
294
|
+
Temperature controls the probability distribution over tokens. Lower values make the model more deterministic; higher values increase randomness and creativity.
|
|
295
|
+
|
|
296
|
+
| Temperature | Effect | Use Cases |
|
|
297
|
+
|-------------|--------|-----------|
|
|
298
|
+
| 0.0–0.2 | Highly deterministic, consistent | Structured output, tool calling, factual responses |
|
|
299
|
+
| 0.3–0.5 | Balanced, slight variation | General conversation, summarization |
|
|
300
|
+
| 0.6–0.8 | More creative, diverse | Brainstorming, draft generation |
|
|
301
|
+
| 0.9–1.0+ | High randomness, exploratory | Creative writing, idea exploration, poetry |
|
|
302
|
+
|
|
303
|
+
**How it works:** Temperature scales the logits (pre-softmax scores) before sampling. At T=0, the model always picks the highest-probability token. At T>1, probability differences flatten, making unlikely tokens more probable.
|
|
304
|
+
|
|
305
|
+
**Caveats:**
|
|
306
|
+
|
|
307
|
+
- Even T=0 isn't fully deterministic—hardware concurrency and floating-point variations can introduce tiny differences
|
|
308
|
+
- High temperature increases hallucination risk
|
|
309
|
+
- Temperature interacts with top_p; tuning both simultaneously requires care
|
|
310
|
+
|
|
311
|
+
### Top_p (Nucleus Sampling)
|
|
312
|
+
|
|
313
|
+
Top_p limits sampling to the smallest set of tokens whose cumulative probability exceeds p. This provides a different control over diversity than temperature.
|
|
314
|
+
|
|
315
|
+
| Top_p | Effect |
|
|
316
|
+
|-------|--------|
|
|
317
|
+
| 0.1–0.3 | Very focused, few token choices |
|
|
318
|
+
| 0.5–0.7 | Moderate diversity |
|
|
319
|
+
| 0.9–1.0 | Wide sampling, more variation |
|
|
320
|
+
|
|
321
|
+
**Temperature vs Top_p:**
|
|
322
|
+
|
|
323
|
+
- Temperature affects *all* token probabilities uniformly
|
|
324
|
+
- Top_p dynamically adjusts the candidate pool based on probability mass
|
|
325
|
+
- For most use cases, adjust one and leave the other at default
|
|
326
|
+
- Common pattern: low temperature (0.0–0.3) with top_p=1.0 for structured tasks
|
|
327
|
+
|
|
328
|
+
### Provider Temperature Ranges
|
|
329
|
+
|
|
330
|
+
| Provider | Range | Default | Notes |
|
|
331
|
+
|----------|-------|---------|-------|
|
|
332
|
+
| OpenAI | 0.0–2.0 | 1.0 | Values >1.0 increase randomness significantly |
|
|
333
|
+
| Anthropic | 0.0–1.0 | 1.0 | Cannot exceed 1.0 |
|
|
334
|
+
| Gemini | 0.0–2.0 | 1.0 | Similar to OpenAI |
|
|
335
|
+
| Ollama | 0.0–1.0+ | 0.7–0.8 | Model-dependent defaults |
|
|
336
|
+
|
|
337
|
+
### Phase-Specific Temperature
|
|
338
|
+
|
|
339
|
+
Since temperature can be set per API call, use different values for different workflow phases:
|
|
340
|
+
|
|
341
|
+
| Phase | Temperature | Rationale |
|
|
342
|
+
|-------|-------------|-----------|
|
|
343
|
+
| Brainstorming/Discuss | 0.7–1.0 | Encourage diverse ideas, exploration |
|
|
344
|
+
| Planning/Freeze | 0.3–0.5 | Balance creativity with coherence |
|
|
345
|
+
| Serialize/Tool calls | 0.0–0.2 | Maximize format compliance |
|
|
346
|
+
| Validation repair | 0.0–0.2 | Deterministic corrections |
|
|
347
|
+
|
|
348
|
+
This is particularly relevant for the **Discuss → Freeze → Serialize** pattern described below—each stage benefits from different temperature settings.
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
288
352
|
## Structured Output Pipelines
|
|
289
353
|
|
|
290
354
|
Many agent tasks end in a **strict artifact**—JSON/YAML configs, story plans, outlines—rather than free-form prose. Trying to get both *conversation* and *perfectly formatted output* from a single response is brittle, especially for small/local models.
|
|
@@ -297,21 +361,23 @@ A more reliable approach is to separate the flow into stages:
|
|
|
297
361
|
|
|
298
362
|
### Discuss → Freeze → Serialize
|
|
299
363
|
|
|
300
|
-
**Discuss
|
|
364
|
+
**Discuss** (temperature 0.7–1.0): Keep prompts focused on meaning, not field names. Explicitly tell the model *not* to output JSON/YAML during this phase. Higher temperature encourages diverse ideas and creative exploration.
|
|
301
365
|
|
|
302
|
-
**Freeze
|
|
366
|
+
**Freeze** (temperature 0.3–0.5): Compress decisions into a short summary:
|
|
303
367
|
|
|
304
368
|
- 10–30 bullets, one decision per line.
|
|
305
369
|
- No open questions, only resolved choices.
|
|
306
370
|
- Structured enough that a smaller model can follow it reliably.
|
|
371
|
+
- Moderate temperature balances coherence with flexibility.
|
|
307
372
|
|
|
308
|
-
**Serialize
|
|
373
|
+
**Serialize** (temperature 0.0–0.2): In a separate call:
|
|
309
374
|
|
|
310
375
|
- Provide the schema (JSON Schema, typed model, or tool definition).
|
|
311
|
-
- Instruct:
|
|
376
|
+
- Instruct: *"Output only JSON that matches this schema. No prose, no markdown fences."*
|
|
312
377
|
- Use constrained decoding/tool calling where available.
|
|
378
|
+
- Low temperature maximizes format compliance.
|
|
313
379
|
|
|
314
|
-
This separates conversational drift from serialization, which significantly improves reliability for structured outputs like story plans, world-bible slices, or configuration objects.
|
|
380
|
+
This separates conversational drift from serialization, which significantly improves reliability for structured outputs like story plans, world-bible slices, or configuration objects. The temperature gradient—high for exploration, low for precision—matches each phase's purpose.
|
|
315
381
|
|
|
316
382
|
### Tool-Gated Finalization
|
|
317
383
|
|
|
@@ -363,7 +429,108 @@ When a candidate fails validation, the repair prompt should:
|
|
|
363
429
|
|
|
364
430
|
> “Return a corrected JSON object that fixes **only** these errors. Do not change fields that are not mentioned. Output only JSON.”
|
|
365
431
|
|
|
366
|
-
For small models, keep error descriptions compact and concrete rather than abstract (
|
|
432
|
+
For small models, keep error descriptions compact and concrete rather than abstract ("string too long: 345 > max 200").
|
|
433
|
+
|
|
434
|
+
### Structured Validation Feedback
|
|
435
|
+
|
|
436
|
+
Rather than returning free-form error messages, use a structured feedback format that leverages attention patterns (status first, action last) and distinguishes error types clearly.
|
|
437
|
+
|
|
438
|
+
**Result Categories**
|
|
439
|
+
|
|
440
|
+
Use a semantic result enum rather than boolean success/failure:
|
|
441
|
+
|
|
442
|
+
| Result | Meaning | Model Action |
|
|
443
|
+
|--------|---------|--------------|
|
|
444
|
+
| `accepted` | Validation passed, artifact stored | Proceed to next step |
|
|
445
|
+
| `validation_failed` | Content issues the model can fix | Repair and resubmit |
|
|
446
|
+
| `tool_error` | Infrastructure failure | Retry unchanged or escalate |
|
|
447
|
+
|
|
448
|
+
This distinction matters: `validation_failed` tells the model its *content* was wrong (fixable), while `tool_error` indicates the tool itself failed (retry or give up).
|
|
449
|
+
|
|
450
|
+
**Error Categorization**
|
|
451
|
+
|
|
452
|
+
Group validation errors by type to help the model understand what went wrong:
|
|
453
|
+
|
|
454
|
+
```json
|
|
455
|
+
{
|
|
456
|
+
"result": "validation_failed",
|
|
457
|
+
"issues": {
|
|
458
|
+
"invalid": [
|
|
459
|
+
{"field": "estimated_passages", "value": 15, "requirement": "must be 1-10"}
|
|
460
|
+
],
|
|
461
|
+
"missing": ["protagonist_name", "setting"],
|
|
462
|
+
"unknown": ["passages"]
|
|
463
|
+
},
|
|
464
|
+
"issue_count": {"invalid": 1, "missing": 2, "unknown": 1},
|
|
465
|
+
"action": "Fix the 4 issues above and resubmit. Use exact field names from the schema."
|
|
466
|
+
}
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
| Category | Meaning | Common Cause |
|
|
470
|
+
|----------|---------|--------------|
|
|
471
|
+
| `invalid` | Field present but value wrong | Constraint violation, wrong type |
|
|
472
|
+
| `missing` | Required field not provided | Omission, incomplete output |
|
|
473
|
+
| `unknown` | Field not in schema | Typo, hallucinated field name |
|
|
474
|
+
|
|
475
|
+
The `unknown` category is particularly valuable—it catches near-misses like `passages` instead of `estimated_passages` that would otherwise appear as "missing" with no hint about the typo.
|
|
476
|
+
|
|
477
|
+
**Field Ordering (Primacy/Recency)**
|
|
478
|
+
|
|
479
|
+
Structure feedback to exploit the U-shaped attention curve:
|
|
480
|
+
|
|
481
|
+
1. **Result status** (first—immediate orientation)
|
|
482
|
+
2. **Issues by category** (middle—detailed content)
|
|
483
|
+
3. **Issue count** (severity summary)
|
|
484
|
+
4. **Action instructions** (last—what to do next)
|
|
485
|
+
|
|
486
|
+
**What NOT to Include**
|
|
487
|
+
|
|
488
|
+
| Avoid | Why |
|
|
489
|
+
|-------|-----|
|
|
490
|
+
| Full schema | Already in tool definition; wastes tokens in retry loops |
|
|
491
|
+
| Boolean `success` field | Ambiguous; use semantic result categories instead |
|
|
492
|
+
| Generic hints | Replace with actionable, field-specific instructions |
|
|
493
|
+
| Valid fields | Only describe what failed, not what succeeded |
|
|
494
|
+
|
|
495
|
+
**Example: Before and After**
|
|
496
|
+
|
|
497
|
+
Anti-pattern (vague, wastes tokens):
|
|
498
|
+
|
|
499
|
+
```
|
|
500
|
+
Error: Validation failed. Expected fields: type, title, protagonist_name,
|
|
501
|
+
setting, theme, estimated_passages, tone. Please check your submission
|
|
502
|
+
and ensure all required fields are present with valid values.
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
Better (specific, actionable):
|
|
506
|
+
|
|
507
|
+
```json
|
|
508
|
+
{
|
|
509
|
+
"result": "validation_failed",
|
|
510
|
+
"issues": {
|
|
511
|
+
"invalid": [{"field": "type", "value": "story", "requirement": "must be 'dream'"}],
|
|
512
|
+
"missing": ["protagonist_name"],
|
|
513
|
+
"unknown": ["passages"]
|
|
514
|
+
},
|
|
515
|
+
"action": "Fix these 3 issues. Did you mean 'estimated_passages' instead of 'passages'?"
|
|
516
|
+
}
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
The improved version:
|
|
520
|
+
|
|
521
|
+
- Names the exact fields that failed
|
|
522
|
+
- Suggests the likely typo (`passages` → `estimated_passages`)
|
|
523
|
+
- Doesn't repeat schema information already available to the model
|
|
524
|
+
- Ends with a clear action instruction (primacy/recency)
|
|
525
|
+
|
|
526
|
+
### Retry Budget and Token Efficiency
|
|
527
|
+
|
|
528
|
+
Validation loops consume tokens. Design for efficiency:
|
|
529
|
+
|
|
530
|
+
- **Cap retries**: 2-3 attempts is usually sufficient; more indicates a prompt or schema problem
|
|
531
|
+
- **Escalate gracefully**: After retry budget exhausted, surface a clear failure rather than looping
|
|
532
|
+
- **Track retry rates**: High retry rates signal opportunities for prompt improvement or schema simplification
|
|
533
|
+
- **Consider model capability**: Less capable models may need higher retry budgets but with simpler feedback
|
|
367
534
|
|
|
368
535
|
### Best Practices
|
|
369
536
|
|
|
@@ -528,9 +695,12 @@ Before deploying:
|
|
|
528
695
|
|
|
529
696
|
## Provider-Specific Optimizations
|
|
530
697
|
|
|
531
|
-
- **Anthropic**: Use `token-efficient-tools` beta header for up to 70% output token reduction
|
|
532
|
-
- **OpenAI**: Consider fine-tuning for frequently-used patterns
|
|
533
|
-
- **
|
|
698
|
+
- **Anthropic**: Use `token-efficient-tools` beta header for up to 70% output token reduction; temperature capped at 1.0
|
|
699
|
+
- **OpenAI**: Consider fine-tuning for frequently-used patterns; temperature range 0.0–2.0
|
|
700
|
+
- **Gemini**: Temperature range 0.0–2.0, similar behavior to OpenAI
|
|
701
|
+
- **Ollama/Local**: Tool retrieval essential—small models struggle with 10+ tools; default temperature varies by model (typically 0.7–0.8)
|
|
702
|
+
|
|
703
|
+
See [Sampling Parameters](#sampling-parameters) for detailed temperature guidance by use case.
|
|
534
704
|
|
|
535
705
|
---
|
|
536
706
|
|
|
@@ -549,6 +719,8 @@ Before deploying:
|
|
|
549
719
|
| Dynamic few-shot | Static example bloat | Retrieve relevant examples |
|
|
550
720
|
| Reflection | Quality failures | Draft → critique → refine |
|
|
551
721
|
| Context pruning | Context rot | Summarize and remove stale turns |
|
|
722
|
+
| Structured feedback | Vague validation errors | Categorize issues (invalid/missing/unknown) |
|
|
723
|
+
| Phase-specific temperature | Format errors in structured output | High temp for discuss, low for serialize |
|
|
552
724
|
|
|
553
725
|
| Model Class | Max Prompt | Max Tools | Strategy |
|
|
554
726
|
|-------------|------------|-----------|----------|
|
|
@@ -567,6 +739,8 @@ Before deploying:
|
|
|
567
739
|
| RAG-MCP (2025) | Two-stage selection reduces tokens 50%+, improves accuracy 3x |
|
|
568
740
|
| Anthropic Token-Efficient Tools | Schema optimization reduces output tokens 70% |
|
|
569
741
|
| Reflexion research | Self-correction improves quality on complex tasks |
|
|
742
|
+
| STROT Framework (2025) | Structured feedback loops achieve 95% first-attempt success |
|
|
743
|
+
| AWS Evaluator-Optimizer | Semantic reflection enables self-improving validation |
|
|
570
744
|
|
|
571
745
|
---
|
|
572
746
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ifcraftcorpus"
|
|
3
|
-
version = "1.1
|
|
3
|
+
version = "1.2.1"
|
|
4
4
|
description = "Interactive fiction craft corpus with search library and MCP server"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -80,6 +80,7 @@ build-backend = "hatchling.build"
|
|
|
80
80
|
include = [
|
|
81
81
|
"/src",
|
|
82
82
|
"/corpus",
|
|
83
|
+
"/subagents",
|
|
83
84
|
]
|
|
84
85
|
|
|
85
86
|
[tool.hatch.build.targets.wheel]
|
|
@@ -87,6 +88,7 @@ packages = ["src/ifcraftcorpus"]
|
|
|
87
88
|
|
|
88
89
|
[tool.hatch.build.targets.wheel.shared-data]
|
|
89
90
|
"corpus" = "share/ifcraftcorpus/corpus"
|
|
91
|
+
"subagents" = "share/ifcraftcorpus/subagents"
|
|
90
92
|
|
|
91
93
|
[tool.ruff]
|
|
92
94
|
line-length = 100
|
|
@@ -101,6 +103,17 @@ strict = true
|
|
|
101
103
|
warn_return_any = true
|
|
102
104
|
warn_unused_ignores = true
|
|
103
105
|
|
|
106
|
+
[[tool.mypy.overrides]]
|
|
107
|
+
module = [
|
|
108
|
+
"fastmcp",
|
|
109
|
+
"fastmcp.prompts",
|
|
110
|
+
"mcp.*",
|
|
111
|
+
"sentence_transformers",
|
|
112
|
+
"numpy",
|
|
113
|
+
"httpx",
|
|
114
|
+
]
|
|
115
|
+
ignore_missing_imports = true
|
|
116
|
+
|
|
104
117
|
[tool.pytest.ini_options]
|
|
105
118
|
testpaths = ["tests"]
|
|
106
119
|
addopts = "-v --tb=short"
|
|
@@ -17,8 +17,26 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import argparse
|
|
19
19
|
import json
|
|
20
|
+
import logging
|
|
20
21
|
import sys
|
|
21
22
|
from pathlib import Path
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from ifcraftcorpus.providers import EmbeddingProvider
|
|
27
|
+
|
|
28
|
+
from ifcraftcorpus.logging_utils import configure_logging
|
|
29
|
+
|
|
30
|
+
configure_logging()
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _truncate(value: str, limit: int = 120) -> str:
|
|
35
|
+
"""Shorten long log values to keep CLI logs readable."""
|
|
36
|
+
|
|
37
|
+
if len(value) <= limit:
|
|
38
|
+
return value
|
|
39
|
+
return f"{value[:limit]}..."
|
|
22
40
|
|
|
23
41
|
|
|
24
42
|
def cmd_info(args: argparse.Namespace) -> int:
|
|
@@ -26,12 +44,19 @@ def cmd_info(args: argparse.Namespace) -> int:
|
|
|
26
44
|
from ifcraftcorpus import Corpus, __version__
|
|
27
45
|
|
|
28
46
|
corpus = Corpus()
|
|
47
|
+
clusters = corpus.list_clusters()
|
|
48
|
+
logger.info(
|
|
49
|
+
"CLI info command: version=%s docs=%s clusters=%s",
|
|
50
|
+
__version__,
|
|
51
|
+
corpus.document_count(),
|
|
52
|
+
len(clusters),
|
|
53
|
+
)
|
|
29
54
|
|
|
30
55
|
print(f"\nIF Craft Corpus v{__version__}")
|
|
31
56
|
print(f"Documents: {corpus.document_count()}")
|
|
32
|
-
print(f"Clusters: {len(
|
|
57
|
+
print(f"Clusters: {len(clusters)}")
|
|
33
58
|
print("\nClusters:")
|
|
34
|
-
for cluster in
|
|
59
|
+
for cluster in clusters:
|
|
35
60
|
docs = [d for d in corpus.list_documents() if d["cluster"] == cluster]
|
|
36
61
|
print(f" {cluster}: {len(docs)} file(s)")
|
|
37
62
|
|
|
@@ -43,6 +68,12 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
43
68
|
from ifcraftcorpus import Corpus
|
|
44
69
|
|
|
45
70
|
corpus = Corpus()
|
|
71
|
+
logger.info(
|
|
72
|
+
"CLI search query=%r cluster=%s limit=%s",
|
|
73
|
+
_truncate(args.query),
|
|
74
|
+
args.cluster,
|
|
75
|
+
args.limit,
|
|
76
|
+
)
|
|
46
77
|
results = corpus.search(
|
|
47
78
|
args.query,
|
|
48
79
|
limit=args.limit,
|
|
@@ -51,6 +82,7 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
51
82
|
)
|
|
52
83
|
|
|
53
84
|
if not results:
|
|
85
|
+
logger.info("CLI search returned no matches")
|
|
54
86
|
print("No results found.")
|
|
55
87
|
return 0
|
|
56
88
|
|
|
@@ -65,6 +97,7 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
65
97
|
content += "..."
|
|
66
98
|
print(f" {content}")
|
|
67
99
|
|
|
100
|
+
logger.info("CLI search returned %s results", len(results))
|
|
68
101
|
return 0
|
|
69
102
|
|
|
70
103
|
|
|
@@ -77,6 +110,7 @@ def cmd_embeddings_status(args: argparse.Namespace) -> int:
|
|
|
77
110
|
get_embedding_provider,
|
|
78
111
|
)
|
|
79
112
|
|
|
113
|
+
logger.debug("CLI embeddings status requested")
|
|
80
114
|
print("\n=== Embedding Providers ===\n")
|
|
81
115
|
|
|
82
116
|
# Check each provider
|
|
@@ -129,7 +163,7 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
129
163
|
)
|
|
130
164
|
|
|
131
165
|
# Get provider
|
|
132
|
-
provider = None
|
|
166
|
+
provider: EmbeddingProvider | None = None
|
|
133
167
|
if args.provider:
|
|
134
168
|
if args.provider == "ollama":
|
|
135
169
|
provider = OllamaEmbeddings(model=args.model, host=args.ollama_host)
|
|
@@ -152,12 +186,19 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
152
186
|
print(f"Provider {provider.provider_name} is not available.", file=sys.stderr)
|
|
153
187
|
return 1
|
|
154
188
|
|
|
189
|
+
logger.info(
|
|
190
|
+
"CLI embeddings build provider=%s model=%s output=%s",
|
|
191
|
+
provider.provider_name,
|
|
192
|
+
provider.model,
|
|
193
|
+
args.output,
|
|
194
|
+
)
|
|
155
195
|
print(f"Using provider: {provider.provider_name}")
|
|
156
196
|
print(f"Model: {provider.model} ({provider.dimension}d)")
|
|
157
197
|
|
|
158
198
|
# Build embeddings
|
|
159
199
|
corpus = Corpus()
|
|
160
|
-
|
|
200
|
+
doc_total = corpus.document_count()
|
|
201
|
+
print(f"\nBuilding embeddings for {doc_total} documents...")
|
|
161
202
|
|
|
162
203
|
# Use the corpus's internal index
|
|
163
204
|
embedding_index = EmbeddingIndex(provider=provider)
|
|
@@ -214,6 +255,12 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
214
255
|
output_path = Path(args.output)
|
|
215
256
|
embedding_index.save(output_path)
|
|
216
257
|
|
|
258
|
+
logger.info(
|
|
259
|
+
"CLI embeddings build completed docs=%s sections=%s output=%s",
|
|
260
|
+
doc_count,
|
|
261
|
+
section_count,
|
|
262
|
+
output_path,
|
|
263
|
+
)
|
|
217
264
|
print(f"\nDone! Embedded {section_count} sections from {doc_count} documents.")
|
|
218
265
|
print(f"Saved to: {output_path}")
|
|
219
266
|
|
|
@@ -273,7 +320,9 @@ def main() -> int:
|
|
|
273
320
|
emb_parser.print_help()
|
|
274
321
|
return 0
|
|
275
322
|
|
|
276
|
-
|
|
323
|
+
logger.debug("CLI command executed: %s", args.command)
|
|
324
|
+
result: int = args.func(args)
|
|
325
|
+
return result
|
|
277
326
|
|
|
278
327
|
|
|
279
328
|
if __name__ == "__main__":
|
|
@@ -44,10 +44,13 @@ from __future__ import annotations
|
|
|
44
44
|
import json
|
|
45
45
|
import logging
|
|
46
46
|
from pathlib import Path
|
|
47
|
-
from typing import TYPE_CHECKING
|
|
47
|
+
from typing import TYPE_CHECKING, Any
|
|
48
48
|
|
|
49
49
|
import numpy as np
|
|
50
50
|
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from sentence_transformers import SentenceTransformer
|
|
53
|
+
|
|
51
54
|
if TYPE_CHECKING:
|
|
52
55
|
from ifcraftcorpus.index import CorpusIndex
|
|
53
56
|
from ifcraftcorpus.providers import EmbeddingProvider
|
|
@@ -107,7 +110,8 @@ class EmbeddingIndex:
|
|
|
107
110
|
"""
|
|
108
111
|
self._provider = provider
|
|
109
112
|
self._embeddings: np.ndarray | None = None
|
|
110
|
-
self._metadata: list[dict] = []
|
|
113
|
+
self._metadata: list[dict[str, Any]] = []
|
|
114
|
+
self._st_model: SentenceTransformer | None = None
|
|
111
115
|
|
|
112
116
|
# For backward compatibility / persistence
|
|
113
117
|
if provider:
|
|
@@ -117,7 +121,6 @@ class EmbeddingIndex:
|
|
|
117
121
|
self.model_name = model_name
|
|
118
122
|
self._provider_name = "sentence-transformers"
|
|
119
123
|
# Lazy-load sentence-transformers model
|
|
120
|
-
self._st_model = None
|
|
121
124
|
if not lazy_load:
|
|
122
125
|
self._load_st_model()
|
|
123
126
|
|
|
@@ -126,7 +129,7 @@ class EmbeddingIndex:
|
|
|
126
129
|
"""Get the provider name."""
|
|
127
130
|
return self._provider_name
|
|
128
131
|
|
|
129
|
-
def _load_st_model(self):
|
|
132
|
+
def _load_st_model(self) -> SentenceTransformer:
|
|
130
133
|
"""Load sentence-transformers model (fallback)."""
|
|
131
134
|
if self._st_model is None:
|
|
132
135
|
try:
|
|
@@ -148,12 +151,13 @@ class EmbeddingIndex:
|
|
|
148
151
|
else:
|
|
149
152
|
# Fallback to sentence-transformers
|
|
150
153
|
model = self._load_st_model()
|
|
151
|
-
|
|
154
|
+
embeddings = model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
|
155
|
+
return np.asarray(embeddings)
|
|
152
156
|
|
|
153
157
|
def add_texts(
|
|
154
158
|
self,
|
|
155
159
|
texts: list[str],
|
|
156
|
-
metadata: list[dict],
|
|
160
|
+
metadata: list[dict[str, Any]],
|
|
157
161
|
) -> None:
|
|
158
162
|
"""Add texts with metadata to the index.
|
|
159
163
|
|
|
@@ -185,7 +189,7 @@ class EmbeddingIndex:
|
|
|
185
189
|
*,
|
|
186
190
|
top_k: int = 10,
|
|
187
191
|
cluster: str | None = None,
|
|
188
|
-
) -> list[tuple[dict, float]]:
|
|
192
|
+
) -> list[tuple[dict[str, Any], float]]:
|
|
189
193
|
"""Search for semantically similar texts.
|
|
190
194
|
|
|
191
195
|
Args:
|
|
@@ -48,10 +48,31 @@ from __future__ import annotations
|
|
|
48
48
|
import sqlite3
|
|
49
49
|
from dataclasses import dataclass
|
|
50
50
|
from pathlib import Path
|
|
51
|
+
from typing import Any
|
|
51
52
|
|
|
52
53
|
from ifcraftcorpus.parser import Document, parse_directory
|
|
53
54
|
|
|
54
55
|
|
|
56
|
+
def _sanitize_fts_query(query: str) -> str:
|
|
57
|
+
"""Sanitize a query string for the FTS5 MATCH clause.
|
|
58
|
+
|
|
59
|
+
This function replaces hyphens with spaces to prevent FTS5 from
|
|
60
|
+
interpreting them as the `NOT` operator. This is intended to correctly
|
|
61
|
+
handle natural language queries with hyphenated words, for example
|
|
62
|
+
transforming "haunted-house" into a search for "haunted house".
|
|
63
|
+
|
|
64
|
+
It also collapses any resulting multiple spaces into a single space.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query: Raw query string from user input.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Sanitized query safe for FTS5 MATCH.
|
|
71
|
+
"""
|
|
72
|
+
# Replace hyphens and collapse whitespace in one go.
|
|
73
|
+
return " ".join(query.replace("-", " ").split())
|
|
74
|
+
|
|
75
|
+
|
|
55
76
|
@dataclass
|
|
56
77
|
class SearchResult:
|
|
57
78
|
"""A search result from the corpus FTS5 index.
|
|
@@ -379,8 +400,8 @@ class CorpusIndex:
|
|
|
379
400
|
... cluster="emotional-design",
|
|
380
401
|
... limit=5)
|
|
381
402
|
"""
|
|
382
|
-
# Build FTS5 query
|
|
383
|
-
fts_query = query
|
|
403
|
+
# Build FTS5 query - sanitize to handle special characters
|
|
404
|
+
fts_query = _sanitize_fts_query(query)
|
|
384
405
|
|
|
385
406
|
# Add cluster filter if specified
|
|
386
407
|
where_clause = ""
|
|
@@ -462,7 +483,7 @@ class CorpusIndex:
|
|
|
462
483
|
cursor = self.conn.execute("SELECT DISTINCT cluster FROM documents ORDER BY cluster")
|
|
463
484
|
return [row["cluster"] for row in cursor]
|
|
464
485
|
|
|
465
|
-
def get_document(self, name: str) -> dict | None:
|
|
486
|
+
def get_document(self, name: str) -> dict[str, Any] | None:
|
|
466
487
|
"""Get a document by name with all its sections.
|
|
467
488
|
|
|
468
489
|
Retrieves complete document data including metadata and all
|
|
@@ -535,7 +556,8 @@ class CorpusIndex:
|
|
|
535
556
|
Count of documents in the index.
|
|
536
557
|
"""
|
|
537
558
|
cursor = self.conn.execute("SELECT COUNT(*) FROM documents")
|
|
538
|
-
|
|
559
|
+
result = cursor.fetchone()
|
|
560
|
+
return int(result[0]) if result else 0
|
|
539
561
|
|
|
540
562
|
|
|
541
563
|
def build_index(corpus_dir: Path, output_path: Path) -> CorpusIndex:
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Shared logging helpers for the IF Craft Corpus codebase."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from typing import Final
|
|
9
|
+
|
|
10
|
+
LOG_LEVEL_ENV: Final[str] = "LOG_LEVEL"
|
|
11
|
+
VERBOSE_ENV: Final[str] = "VERBOSE"
|
|
12
|
+
|
|
13
|
+
__all__ = ["configure_logging", "LOG_LEVEL_ENV", "VERBOSE_ENV"]
|
|
14
|
+
|
|
15
|
+
_TRUTHY_VALUES: Final[set[str]] = {"1", "true", "yes", "on"}
|
|
16
|
+
_configured: bool = False
|
|
17
|
+
_CHATTY_LOGGERS: Final[tuple[str, ...]] = (
|
|
18
|
+
"httpx",
|
|
19
|
+
"fakeredis",
|
|
20
|
+
"docket",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _is_truthy(value: str | None) -> bool:
|
|
25
|
+
"""Return True if the string resembles a truthy flag."""
|
|
26
|
+
|
|
27
|
+
if value is None:
|
|
28
|
+
return False
|
|
29
|
+
return value.strip().lower() in _TRUTHY_VALUES
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _resolve_level(value: str | None) -> int | None:
|
|
33
|
+
"""Convert a logging level string (name or integer) to ``int``."""
|
|
34
|
+
|
|
35
|
+
if not value:
|
|
36
|
+
return None
|
|
37
|
+
candidate = value.strip()
|
|
38
|
+
if not candidate:
|
|
39
|
+
return None
|
|
40
|
+
if candidate.isdigit():
|
|
41
|
+
return int(candidate)
|
|
42
|
+
name = candidate.upper()
|
|
43
|
+
return getattr(logging, name, None)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def configure_logging(
|
|
47
|
+
*,
|
|
48
|
+
env_level: str = LOG_LEVEL_ENV,
|
|
49
|
+
env_verbose: str = VERBOSE_ENV,
|
|
50
|
+
fmt: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
51
|
+
) -> int | None:
|
|
52
|
+
"""Configure root logging when LOG_LEVEL/VERBOSE are set.
|
|
53
|
+
|
|
54
|
+
Returns the configured level when logging is enabled, ``None`` otherwise.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
global _configured
|
|
58
|
+
|
|
59
|
+
raw_level = os.getenv(env_level)
|
|
60
|
+
level = _resolve_level(raw_level)
|
|
61
|
+
verbose_flag = os.getenv(env_verbose)
|
|
62
|
+
|
|
63
|
+
if raw_level and level is None:
|
|
64
|
+
print(
|
|
65
|
+
f"ifcraftcorpus: unknown log level '{raw_level}', defaulting to INFO",
|
|
66
|
+
file=sys.stderr,
|
|
67
|
+
)
|
|
68
|
+
level = logging.INFO
|
|
69
|
+
|
|
70
|
+
if level is None and not _is_truthy(verbose_flag):
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
if level is None:
|
|
74
|
+
level = logging.DEBUG
|
|
75
|
+
|
|
76
|
+
root = logging.getLogger()
|
|
77
|
+
if not (root.handlers and _configured):
|
|
78
|
+
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
|
|
79
|
+
_configured = True
|
|
80
|
+
root.setLevel(level)
|
|
81
|
+
|
|
82
|
+
for name in _CHATTY_LOGGERS:
|
|
83
|
+
logging.getLogger(name).setLevel(max(logging.WARNING, level))
|
|
84
|
+
return level
|