ifcraftcorpus 1.2.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/PKG-INFO +18 -1
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/README.md +17 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/pyproject.toml +12 -1
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/cli.py +47 -3
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/index.py +22 -2
- ifcraftcorpus-1.2.1/src/ifcraftcorpus/logging_utils.py +84 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/mcp_server.py +111 -21
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/providers.py +1 -1
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/search.py +47 -3
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/.gitignore +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/LICENSE +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/LICENSE-CONTENT +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/agent-design/agent_prompt_engineering.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/agent-design/multi_agent_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/accessibility_guidelines.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/audience_targeting.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/localization_considerations.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/audio_visual_integration.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/collaborative_if_writing.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/creative_workflow_pipeline.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/diegetic_design.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/idea_capture_and_hooks.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/if_platform_tools.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/player_analytics_metrics.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/quality_standards_if.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/research_and_verification.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/testing_interactive_fiction.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/emotional-design/conflict_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/emotional-design/emotional_beats.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/game-design/mechanics_design_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/children_and_ya_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/fantasy_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/historical_fiction.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/horror_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/mystery_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/sci_fi_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_construction.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_craft.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/endings_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/episodic_serialized_if.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/nonlinear_structure.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/pacing_and_tension.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/romance_and_relationships.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_structure_and_beats.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_transitions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/character_voice.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/dialogue_craft.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/exposition_techniques.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/narrative_point_of_view.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/prose_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/subtext_and_implication.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/voice_register_consistency.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/scope-and-planning/scope_and_length.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/canon_management.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/setting_as_character.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/worldbuilding_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/__init__.py +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/embeddings.py +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/parser.py +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/src/ifcraftcorpus/py.typed +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/README.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/if_genre_consultant.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/if_platform_advisor.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/if_prose_writer.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/if_quality_reviewer.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/if_story_architect.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/subagents/if_world_curator.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ifcraftcorpus
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: Interactive fiction craft corpus with search library and MCP server
|
|
5
5
|
Project-URL: Homepage, https://pvliesdonk.github.io/if-craft-corpus
|
|
6
6
|
Project-URL: Repository, https://github.com/pvliesdonk/if-craft-corpus
|
|
@@ -124,6 +124,23 @@ results = corpus.search(
|
|
|
124
124
|
| agent-design | 2 | Multi-agent patterns, prompt engineering |
|
|
125
125
|
| game-design | 1 | Mechanics design patterns |
|
|
126
126
|
|
|
127
|
+
## Verbose Logging
|
|
128
|
+
|
|
129
|
+
Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
|
|
130
|
+
before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
|
|
131
|
+
emit detailed logs to stderr. Example:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
LOG_LEVEL=DEBUG ifcraftcorpus-mcp
|
|
135
|
+
|
|
136
|
+
# Docker
|
|
137
|
+
docker run -p 8000:8000 \
|
|
138
|
+
-e LOG_LEVEL=DEBUG \
|
|
139
|
+
ghcr.io/pvliesdonk/if-craft-corpus
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Logs never touch stdout, so stdio transports remain compatible.
|
|
143
|
+
|
|
127
144
|
## Documentation
|
|
128
145
|
|
|
129
146
|
Full documentation: https://pvliesdonk.github.io/if-craft-corpus
|
|
@@ -71,6 +71,23 @@ results = corpus.search(
|
|
|
71
71
|
| agent-design | 2 | Multi-agent patterns, prompt engineering |
|
|
72
72
|
| game-design | 1 | Mechanics design patterns |
|
|
73
73
|
|
|
74
|
+
## Verbose Logging
|
|
75
|
+
|
|
76
|
+
Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
|
|
77
|
+
before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
|
|
78
|
+
emit detailed logs to stderr. Example:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
LOG_LEVEL=DEBUG ifcraftcorpus-mcp
|
|
82
|
+
|
|
83
|
+
# Docker
|
|
84
|
+
docker run -p 8000:8000 \
|
|
85
|
+
-e LOG_LEVEL=DEBUG \
|
|
86
|
+
ghcr.io/pvliesdonk/if-craft-corpus
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Logs never touch stdout, so stdio transports remain compatible.
|
|
90
|
+
|
|
74
91
|
## Documentation
|
|
75
92
|
|
|
76
93
|
Full documentation: https://pvliesdonk.github.io/if-craft-corpus
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ifcraftcorpus"
|
|
3
|
-
version = "1.2.
|
|
3
|
+
version = "1.2.1"
|
|
4
4
|
description = "Interactive fiction craft corpus with search library and MCP server"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -103,6 +103,17 @@ strict = true
|
|
|
103
103
|
warn_return_any = true
|
|
104
104
|
warn_unused_ignores = true
|
|
105
105
|
|
|
106
|
+
[[tool.mypy.overrides]]
|
|
107
|
+
module = [
|
|
108
|
+
"fastmcp",
|
|
109
|
+
"fastmcp.prompts",
|
|
110
|
+
"mcp.*",
|
|
111
|
+
"sentence_transformers",
|
|
112
|
+
"numpy",
|
|
113
|
+
"httpx",
|
|
114
|
+
]
|
|
115
|
+
ignore_missing_imports = true
|
|
116
|
+
|
|
106
117
|
[tool.pytest.ini_options]
|
|
107
118
|
testpaths = ["tests"]
|
|
108
119
|
addopts = "-v --tb=short"
|
|
@@ -17,6 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import argparse
|
|
19
19
|
import json
|
|
20
|
+
import logging
|
|
20
21
|
import sys
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from typing import TYPE_CHECKING
|
|
@@ -24,18 +25,38 @@ from typing import TYPE_CHECKING
|
|
|
24
25
|
if TYPE_CHECKING:
|
|
25
26
|
from ifcraftcorpus.providers import EmbeddingProvider
|
|
26
27
|
|
|
28
|
+
from ifcraftcorpus.logging_utils import configure_logging
|
|
29
|
+
|
|
30
|
+
configure_logging()
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _truncate(value: str, limit: int = 120) -> str:
|
|
35
|
+
"""Shorten long log values to keep CLI logs readable."""
|
|
36
|
+
|
|
37
|
+
if len(value) <= limit:
|
|
38
|
+
return value
|
|
39
|
+
return f"{value[:limit]}..."
|
|
40
|
+
|
|
27
41
|
|
|
28
42
|
def cmd_info(args: argparse.Namespace) -> int:
|
|
29
43
|
"""Show corpus information."""
|
|
30
44
|
from ifcraftcorpus import Corpus, __version__
|
|
31
45
|
|
|
32
46
|
corpus = Corpus()
|
|
47
|
+
clusters = corpus.list_clusters()
|
|
48
|
+
logger.info(
|
|
49
|
+
"CLI info command: version=%s docs=%s clusters=%s",
|
|
50
|
+
__version__,
|
|
51
|
+
corpus.document_count(),
|
|
52
|
+
len(clusters),
|
|
53
|
+
)
|
|
33
54
|
|
|
34
55
|
print(f"\nIF Craft Corpus v{__version__}")
|
|
35
56
|
print(f"Documents: {corpus.document_count()}")
|
|
36
|
-
print(f"Clusters: {len(
|
|
57
|
+
print(f"Clusters: {len(clusters)}")
|
|
37
58
|
print("\nClusters:")
|
|
38
|
-
for cluster in
|
|
59
|
+
for cluster in clusters:
|
|
39
60
|
docs = [d for d in corpus.list_documents() if d["cluster"] == cluster]
|
|
40
61
|
print(f" {cluster}: {len(docs)} file(s)")
|
|
41
62
|
|
|
@@ -47,6 +68,12 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
47
68
|
from ifcraftcorpus import Corpus
|
|
48
69
|
|
|
49
70
|
corpus = Corpus()
|
|
71
|
+
logger.info(
|
|
72
|
+
"CLI search query=%r cluster=%s limit=%s",
|
|
73
|
+
_truncate(args.query),
|
|
74
|
+
args.cluster,
|
|
75
|
+
args.limit,
|
|
76
|
+
)
|
|
50
77
|
results = corpus.search(
|
|
51
78
|
args.query,
|
|
52
79
|
limit=args.limit,
|
|
@@ -55,6 +82,7 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
55
82
|
)
|
|
56
83
|
|
|
57
84
|
if not results:
|
|
85
|
+
logger.info("CLI search returned no matches")
|
|
58
86
|
print("No results found.")
|
|
59
87
|
return 0
|
|
60
88
|
|
|
@@ -69,6 +97,7 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
69
97
|
content += "..."
|
|
70
98
|
print(f" {content}")
|
|
71
99
|
|
|
100
|
+
logger.info("CLI search returned %s results", len(results))
|
|
72
101
|
return 0
|
|
73
102
|
|
|
74
103
|
|
|
@@ -81,6 +110,7 @@ def cmd_embeddings_status(args: argparse.Namespace) -> int:
|
|
|
81
110
|
get_embedding_provider,
|
|
82
111
|
)
|
|
83
112
|
|
|
113
|
+
logger.debug("CLI embeddings status requested")
|
|
84
114
|
print("\n=== Embedding Providers ===\n")
|
|
85
115
|
|
|
86
116
|
# Check each provider
|
|
@@ -156,12 +186,19 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
156
186
|
print(f"Provider {provider.provider_name} is not available.", file=sys.stderr)
|
|
157
187
|
return 1
|
|
158
188
|
|
|
189
|
+
logger.info(
|
|
190
|
+
"CLI embeddings build provider=%s model=%s output=%s",
|
|
191
|
+
provider.provider_name,
|
|
192
|
+
provider.model,
|
|
193
|
+
args.output,
|
|
194
|
+
)
|
|
159
195
|
print(f"Using provider: {provider.provider_name}")
|
|
160
196
|
print(f"Model: {provider.model} ({provider.dimension}d)")
|
|
161
197
|
|
|
162
198
|
# Build embeddings
|
|
163
199
|
corpus = Corpus()
|
|
164
|
-
|
|
200
|
+
doc_total = corpus.document_count()
|
|
201
|
+
print(f"\nBuilding embeddings for {doc_total} documents...")
|
|
165
202
|
|
|
166
203
|
# Use the corpus's internal index
|
|
167
204
|
embedding_index = EmbeddingIndex(provider=provider)
|
|
@@ -218,6 +255,12 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
218
255
|
output_path = Path(args.output)
|
|
219
256
|
embedding_index.save(output_path)
|
|
220
257
|
|
|
258
|
+
logger.info(
|
|
259
|
+
"CLI embeddings build completed docs=%s sections=%s output=%s",
|
|
260
|
+
doc_count,
|
|
261
|
+
section_count,
|
|
262
|
+
output_path,
|
|
263
|
+
)
|
|
221
264
|
print(f"\nDone! Embedded {section_count} sections from {doc_count} documents.")
|
|
222
265
|
print(f"Saved to: {output_path}")
|
|
223
266
|
|
|
@@ -277,6 +320,7 @@ def main() -> int:
|
|
|
277
320
|
emb_parser.print_help()
|
|
278
321
|
return 0
|
|
279
322
|
|
|
323
|
+
logger.debug("CLI command executed: %s", args.command)
|
|
280
324
|
result: int = args.func(args)
|
|
281
325
|
return result
|
|
282
326
|
|
|
@@ -53,6 +53,26 @@ from typing import Any
|
|
|
53
53
|
from ifcraftcorpus.parser import Document, parse_directory
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
def _sanitize_fts_query(query: str) -> str:
|
|
57
|
+
"""Sanitize a query string for the FTS5 MATCH clause.
|
|
58
|
+
|
|
59
|
+
This function replaces hyphens with spaces to prevent FTS5 from
|
|
60
|
+
interpreting them as the `NOT` operator. This is intended to correctly
|
|
61
|
+
handle natural language queries with hyphenated words, for example
|
|
62
|
+
transforming "haunted-house" into a search for "haunted house".
|
|
63
|
+
|
|
64
|
+
It also collapses any resulting multiple spaces into a single space.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query: Raw query string from user input.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Sanitized query safe for FTS5 MATCH.
|
|
71
|
+
"""
|
|
72
|
+
# Replace hyphens and collapse whitespace in one go.
|
|
73
|
+
return " ".join(query.replace("-", " ").split())
|
|
74
|
+
|
|
75
|
+
|
|
56
76
|
@dataclass
|
|
57
77
|
class SearchResult:
|
|
58
78
|
"""A search result from the corpus FTS5 index.
|
|
@@ -380,8 +400,8 @@ class CorpusIndex:
|
|
|
380
400
|
... cluster="emotional-design",
|
|
381
401
|
... limit=5)
|
|
382
402
|
"""
|
|
383
|
-
# Build FTS5 query
|
|
384
|
-
fts_query = query
|
|
403
|
+
# Build FTS5 query - sanitize to handle special characters
|
|
404
|
+
fts_query = _sanitize_fts_query(query)
|
|
385
405
|
|
|
386
406
|
# Add cluster filter if specified
|
|
387
407
|
where_clause = ""
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Shared logging helpers for the IF Craft Corpus codebase."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from typing import Final
|
|
9
|
+
|
|
10
|
+
LOG_LEVEL_ENV: Final[str] = "LOG_LEVEL"
|
|
11
|
+
VERBOSE_ENV: Final[str] = "VERBOSE"
|
|
12
|
+
|
|
13
|
+
__all__ = ["configure_logging", "LOG_LEVEL_ENV", "VERBOSE_ENV"]
|
|
14
|
+
|
|
15
|
+
_TRUTHY_VALUES: Final[set[str]] = {"1", "true", "yes", "on"}
|
|
16
|
+
_configured: bool = False
|
|
17
|
+
_CHATTY_LOGGERS: Final[tuple[str, ...]] = (
|
|
18
|
+
"httpx",
|
|
19
|
+
"fakeredis",
|
|
20
|
+
"docket",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _is_truthy(value: str | None) -> bool:
|
|
25
|
+
"""Return True if the string resembles a truthy flag."""
|
|
26
|
+
|
|
27
|
+
if value is None:
|
|
28
|
+
return False
|
|
29
|
+
return value.strip().lower() in _TRUTHY_VALUES
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _resolve_level(value: str | None) -> int | None:
|
|
33
|
+
"""Convert a logging level string (name or integer) to ``int``."""
|
|
34
|
+
|
|
35
|
+
if not value:
|
|
36
|
+
return None
|
|
37
|
+
candidate = value.strip()
|
|
38
|
+
if not candidate:
|
|
39
|
+
return None
|
|
40
|
+
if candidate.isdigit():
|
|
41
|
+
return int(candidate)
|
|
42
|
+
name = candidate.upper()
|
|
43
|
+
return getattr(logging, name, None)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def configure_logging(
|
|
47
|
+
*,
|
|
48
|
+
env_level: str = LOG_LEVEL_ENV,
|
|
49
|
+
env_verbose: str = VERBOSE_ENV,
|
|
50
|
+
fmt: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
51
|
+
) -> int | None:
|
|
52
|
+
"""Configure root logging when LOG_LEVEL/VERBOSE are set.
|
|
53
|
+
|
|
54
|
+
Returns the configured level when logging is enabled, ``None`` otherwise.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
global _configured
|
|
58
|
+
|
|
59
|
+
raw_level = os.getenv(env_level)
|
|
60
|
+
level = _resolve_level(raw_level)
|
|
61
|
+
verbose_flag = os.getenv(env_verbose)
|
|
62
|
+
|
|
63
|
+
if raw_level and level is None:
|
|
64
|
+
print(
|
|
65
|
+
f"ifcraftcorpus: unknown log level '{raw_level}', defaulting to INFO",
|
|
66
|
+
file=sys.stderr,
|
|
67
|
+
)
|
|
68
|
+
level = logging.INFO
|
|
69
|
+
|
|
70
|
+
if level is None and not _is_truthy(verbose_flag):
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
if level is None:
|
|
74
|
+
level = logging.DEBUG
|
|
75
|
+
|
|
76
|
+
root = logging.getLogger()
|
|
77
|
+
if not (root.handlers and _configured):
|
|
78
|
+
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
|
|
79
|
+
_configured = True
|
|
80
|
+
root.setLevel(level)
|
|
81
|
+
|
|
82
|
+
for name in _CHATTY_LOGGERS:
|
|
83
|
+
logging.getLogger(name).setLevel(max(logging.WARNING, level))
|
|
84
|
+
return level
|
|
@@ -64,17 +64,33 @@ Prompts:
|
|
|
64
64
|
|
|
65
65
|
from __future__ import annotations
|
|
66
66
|
|
|
67
|
+
import logging
|
|
67
68
|
import os
|
|
68
69
|
import sys
|
|
70
|
+
from collections.abc import Callable
|
|
69
71
|
from pathlib import Path
|
|
70
|
-
from typing import Any, Literal
|
|
72
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeVar
|
|
71
73
|
|
|
72
74
|
from fastmcp import FastMCP
|
|
73
75
|
from fastmcp.prompts import Message
|
|
74
76
|
from mcp.types import PromptMessage
|
|
75
77
|
|
|
78
|
+
from ifcraftcorpus.logging_utils import configure_logging
|
|
76
79
|
from ifcraftcorpus.search import Corpus
|
|
77
80
|
|
|
81
|
+
_CONFIGURED_LOG_LEVEL = configure_logging()
|
|
82
|
+
logger = logging.getLogger(__name__)
|
|
83
|
+
if _CONFIGURED_LOG_LEVEL is not None:
|
|
84
|
+
logger.info("MCP logging enabled at %s", logging.getLevelName(_CONFIGURED_LOG_LEVEL))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _truncate(value: str, limit: int = 200) -> str:
|
|
88
|
+
"""Truncate long strings for safe structured logging."""
|
|
89
|
+
|
|
90
|
+
if len(value) <= limit:
|
|
91
|
+
return value
|
|
92
|
+
return f"{value[:limit]}..."
|
|
93
|
+
|
|
78
94
|
|
|
79
95
|
def _get_subagents_dir() -> Path:
|
|
80
96
|
"""Get the path to the subagents directory.
|
|
@@ -88,15 +104,19 @@ def _get_subagents_dir() -> Path:
|
|
|
88
104
|
# We're in a virtual environment
|
|
89
105
|
installed_path = Path(sys.prefix) / "share" / "ifcraftcorpus" / "subagents"
|
|
90
106
|
if installed_path.exists():
|
|
107
|
+
logger.debug("Using installed subagents directory: %s", installed_path)
|
|
91
108
|
return installed_path
|
|
92
109
|
|
|
93
110
|
# Try development location (relative to this file)
|
|
94
111
|
dev_path = Path(__file__).parent.parent.parent.parent / "subagents"
|
|
95
112
|
if dev_path.exists():
|
|
113
|
+
logger.debug("Using development subagents directory: %s", dev_path)
|
|
96
114
|
return dev_path
|
|
97
115
|
|
|
98
116
|
# Fallback to current directory
|
|
99
|
-
|
|
117
|
+
fallback = Path("subagents")
|
|
118
|
+
logger.debug("Using fallback subagents directory: %s", fallback)
|
|
119
|
+
return fallback
|
|
100
120
|
|
|
101
121
|
|
|
102
122
|
def _load_subagent_template(name: str) -> str:
|
|
@@ -114,7 +134,9 @@ def _load_subagent_template(name: str) -> str:
|
|
|
114
134
|
subagents_dir = _get_subagents_dir()
|
|
115
135
|
template_path = subagents_dir / f"{name}.md"
|
|
116
136
|
if not template_path.exists():
|
|
137
|
+
logger.error("Subagent template missing: %s", template_path)
|
|
117
138
|
raise FileNotFoundError(f"Subagent template not found: {template_path}")
|
|
139
|
+
logger.debug("Loaded subagent template %s", template_path.name)
|
|
118
140
|
return template_path.read_text(encoding="utf-8")
|
|
119
141
|
|
|
120
142
|
|
|
@@ -129,6 +151,16 @@ mcp = FastMCP(
|
|
|
129
151
|
""",
|
|
130
152
|
)
|
|
131
153
|
|
|
154
|
+
if TYPE_CHECKING:
|
|
155
|
+
TCallable = TypeVar("TCallable", bound=Callable[..., Any])
|
|
156
|
+
|
|
157
|
+
def tool(func: TCallable, /) -> TCallable: ...
|
|
158
|
+
|
|
159
|
+
def prompt(*args: Any, **kwargs: Any) -> Callable[[TCallable], TCallable]: ...
|
|
160
|
+
else: # pragma: no cover - runtime aliases for decorators
|
|
161
|
+
tool = mcp.tool
|
|
162
|
+
prompt = mcp.prompt
|
|
163
|
+
|
|
132
164
|
# Global corpus instance (initialized on first use)
|
|
133
165
|
_corpus: Corpus | None = None
|
|
134
166
|
|
|
@@ -144,11 +176,12 @@ def get_corpus() -> Corpus:
|
|
|
144
176
|
"""
|
|
145
177
|
global _corpus
|
|
146
178
|
if _corpus is None:
|
|
179
|
+
logger.info("Initializing shared Corpus instance for MCP server")
|
|
147
180
|
_corpus = Corpus()
|
|
148
181
|
return _corpus
|
|
149
182
|
|
|
150
183
|
|
|
151
|
-
@
|
|
184
|
+
@tool
|
|
152
185
|
def search_corpus(
|
|
153
186
|
query: str,
|
|
154
187
|
cluster: str | None = None,
|
|
@@ -175,8 +208,21 @@ def search_corpus(
|
|
|
175
208
|
"""
|
|
176
209
|
limit = max(1, min(20, limit))
|
|
177
210
|
|
|
211
|
+
logger.debug(
|
|
212
|
+
"search_corpus(query=%r, cluster=%s, limit=%s)",
|
|
213
|
+
_truncate(query),
|
|
214
|
+
cluster,
|
|
215
|
+
limit,
|
|
216
|
+
)
|
|
217
|
+
|
|
178
218
|
corpus = get_corpus()
|
|
179
|
-
|
|
219
|
+
try:
|
|
220
|
+
results = corpus.search(query, cluster=cluster, limit=limit)
|
|
221
|
+
except Exception: # pragma: no cover - defensive logging
|
|
222
|
+
logger.exception("search_corpus failed")
|
|
223
|
+
raise
|
|
224
|
+
|
|
225
|
+
logger.debug("search_corpus returning %s results", len(results))
|
|
180
226
|
|
|
181
227
|
return [
|
|
182
228
|
{
|
|
@@ -190,7 +236,7 @@ def search_corpus(
|
|
|
190
236
|
]
|
|
191
237
|
|
|
192
238
|
|
|
193
|
-
@
|
|
239
|
+
@tool
|
|
194
240
|
def get_document(name: str) -> dict[str, Any] | None:
|
|
195
241
|
"""Get a specific document from the IF Craft Corpus.
|
|
196
242
|
|
|
@@ -204,11 +250,17 @@ def get_document(name: str) -> dict[str, Any] | None:
|
|
|
204
250
|
Returns:
|
|
205
251
|
Full document with title, summary, cluster, topics, and all sections.
|
|
206
252
|
"""
|
|
253
|
+
logger.debug("get_document(%s)", name)
|
|
207
254
|
corpus = get_corpus()
|
|
208
|
-
|
|
255
|
+
document = corpus.get_document(name)
|
|
256
|
+
if document is None:
|
|
257
|
+
logger.info("Document not found: %s", name)
|
|
258
|
+
else:
|
|
259
|
+
logger.debug("Document %s retrieved", name)
|
|
260
|
+
return document
|
|
209
261
|
|
|
210
262
|
|
|
211
|
-
@
|
|
263
|
+
@tool
|
|
212
264
|
def list_documents(cluster: str | None = None) -> list[dict[str, Any]]:
|
|
213
265
|
"""List all documents in the IF Craft Corpus.
|
|
214
266
|
|
|
@@ -220,16 +272,18 @@ def list_documents(cluster: str | None = None) -> list[dict[str, Any]]:
|
|
|
220
272
|
Returns:
|
|
221
273
|
List of documents with name, title, cluster, and topics.
|
|
222
274
|
"""
|
|
275
|
+
logger.debug("list_documents(cluster=%s)", cluster)
|
|
223
276
|
corpus = get_corpus()
|
|
224
277
|
docs = corpus.list_documents()
|
|
225
278
|
|
|
226
279
|
if cluster:
|
|
227
280
|
docs = [d for d in docs if d["cluster"] == cluster]
|
|
228
281
|
|
|
282
|
+
logger.debug("list_documents returning %s entries", len(docs))
|
|
229
283
|
return docs
|
|
230
284
|
|
|
231
285
|
|
|
232
|
-
@
|
|
286
|
+
@tool
|
|
233
287
|
def list_clusters() -> list[dict[str, Any]]:
|
|
234
288
|
"""List all topic clusters in the IF Craft Corpus.
|
|
235
289
|
|
|
@@ -239,6 +293,7 @@ def list_clusters() -> list[dict[str, Any]]:
|
|
|
239
293
|
Returns:
|
|
240
294
|
List of clusters with names and document counts.
|
|
241
295
|
"""
|
|
296
|
+
logger.debug("list_clusters invoked")
|
|
242
297
|
corpus = get_corpus()
|
|
243
298
|
clusters = corpus.list_clusters()
|
|
244
299
|
docs = corpus.list_documents()
|
|
@@ -249,26 +304,36 @@ def list_clusters() -> list[dict[str, Any]]:
|
|
|
249
304
|
c = d["cluster"]
|
|
250
305
|
counts[c] = counts.get(c, 0) + 1
|
|
251
306
|
|
|
252
|
-
|
|
307
|
+
cluster_info = [{"name": c, "document_count": counts.get(c, 0)} for c in clusters]
|
|
308
|
+
logger.debug("list_clusters returning %s clusters", len(cluster_info))
|
|
309
|
+
return cluster_info
|
|
253
310
|
|
|
254
311
|
|
|
255
|
-
@
|
|
312
|
+
@tool
|
|
256
313
|
def corpus_stats() -> dict[str, Any]:
|
|
257
314
|
"""Get statistics about the IF Craft Corpus.
|
|
258
315
|
|
|
259
316
|
Returns:
|
|
260
317
|
Statistics including document count, cluster count, and availability.
|
|
261
318
|
"""
|
|
319
|
+
logger.debug("corpus_stats invoked")
|
|
262
320
|
corpus = get_corpus()
|
|
263
|
-
|
|
321
|
+
stats = {
|
|
264
322
|
"document_count": corpus.document_count(),
|
|
265
323
|
"cluster_count": len(corpus.list_clusters()),
|
|
266
324
|
"clusters": corpus.list_clusters(),
|
|
267
325
|
"semantic_search_available": corpus.has_semantic_search,
|
|
268
326
|
}
|
|
327
|
+
logger.debug(
|
|
328
|
+
"corpus_stats: docs=%s clusters=%s semantic=%s",
|
|
329
|
+
stats["document_count"],
|
|
330
|
+
stats["cluster_count"],
|
|
331
|
+
stats["semantic_search_available"],
|
|
332
|
+
)
|
|
333
|
+
return stats
|
|
269
334
|
|
|
270
335
|
|
|
271
|
-
@
|
|
336
|
+
@tool
|
|
272
337
|
def embeddings_status() -> dict[str, Any]:
|
|
273
338
|
"""Get status of embedding providers and index.
|
|
274
339
|
|
|
@@ -278,6 +343,7 @@ def embeddings_status() -> dict[str, Any]:
|
|
|
278
343
|
Returns:
|
|
279
344
|
Dict with provider availability and embedding index status.
|
|
280
345
|
"""
|
|
346
|
+
logger.debug("embeddings_status invoked")
|
|
281
347
|
result: dict[str, Any] = {
|
|
282
348
|
"semantic_search_available": get_corpus().has_semantic_search,
|
|
283
349
|
"providers": {},
|
|
@@ -306,13 +372,15 @@ def embeddings_status() -> dict[str, Any]:
|
|
|
306
372
|
"model": provider.model if available else None,
|
|
307
373
|
"dimension": provider.dimension if available else None,
|
|
308
374
|
}
|
|
309
|
-
except Exception:
|
|
375
|
+
except Exception as exc: # pragma: no cover - defensive logging
|
|
376
|
+
logger.warning("Failed to inspect embedding provider %s: %s", name, exc)
|
|
310
377
|
result["providers"][name] = {"available": False, "error": "import_failed"}
|
|
311
378
|
|
|
312
379
|
# Auto-detect best provider
|
|
313
380
|
auto = get_embedding_provider()
|
|
314
381
|
result["auto_detected_provider"] = auto.provider_name if auto else None
|
|
315
382
|
except ImportError:
|
|
383
|
+
logger.warning("Embedding providers module not importable for status call")
|
|
316
384
|
result["providers_error"] = "providers module not available"
|
|
317
385
|
|
|
318
386
|
# Check for saved embeddings
|
|
@@ -329,10 +397,16 @@ def embeddings_status() -> dict[str, Any]:
|
|
|
329
397
|
"count": len(meta.get("metadata", [])),
|
|
330
398
|
}
|
|
331
399
|
|
|
400
|
+
logger.debug(
|
|
401
|
+
"embeddings_status semantic=%s providers=%s saved=%s",
|
|
402
|
+
result["semantic_search_available"],
|
|
403
|
+
list(result["providers"].keys()),
|
|
404
|
+
bool(result["saved_embeddings"]),
|
|
405
|
+
)
|
|
332
406
|
return result
|
|
333
407
|
|
|
334
408
|
|
|
335
|
-
@
|
|
409
|
+
@tool
|
|
336
410
|
def build_embeddings(
|
|
337
411
|
provider: str | None = None,
|
|
338
412
|
force: bool = False,
|
|
@@ -358,6 +432,8 @@ def build_embeddings(
|
|
|
358
432
|
"""
|
|
359
433
|
global _corpus
|
|
360
434
|
|
|
435
|
+
logger.info("build_embeddings requested provider=%s force=%s", provider, force)
|
|
436
|
+
|
|
361
437
|
try:
|
|
362
438
|
from ifcraftcorpus.providers import (
|
|
363
439
|
OllamaEmbeddings,
|
|
@@ -366,6 +442,7 @@ def build_embeddings(
|
|
|
366
442
|
get_embedding_provider,
|
|
367
443
|
)
|
|
368
444
|
except ImportError:
|
|
445
|
+
logger.warning("Embedding provider modules not installed")
|
|
369
446
|
return {
|
|
370
447
|
"error": "Embedding providers not available. "
|
|
371
448
|
"Install with [embeddings-api] or [embeddings] extras."
|
|
@@ -380,6 +457,7 @@ def build_embeddings(
|
|
|
380
457
|
"sentence_transformers": SentenceTransformersEmbeddings,
|
|
381
458
|
}
|
|
382
459
|
if provider not in provider_map:
|
|
460
|
+
logger.warning("Unknown embeddings provider requested: %s", provider)
|
|
383
461
|
return {
|
|
384
462
|
"error": f"Unknown provider: {provider}. Use: ollama, openai, sentence_transformers"
|
|
385
463
|
}
|
|
@@ -388,12 +466,14 @@ def build_embeddings(
|
|
|
388
466
|
embedding_provider = get_embedding_provider()
|
|
389
467
|
|
|
390
468
|
if not embedding_provider:
|
|
469
|
+
logger.warning("No embedding provider available for build request")
|
|
391
470
|
return {
|
|
392
471
|
"error": "No embedding provider available. "
|
|
393
472
|
"Configure Ollama, set OPENAI_API_KEY, or install sentence-transformers."
|
|
394
473
|
}
|
|
395
474
|
|
|
396
475
|
if not embedding_provider.check_availability():
|
|
476
|
+
logger.warning("Embedding provider %s unavailable", embedding_provider.provider_name)
|
|
397
477
|
return {"error": f"Provider {embedding_provider.provider_name} is not available."}
|
|
398
478
|
|
|
399
479
|
# Configure paths
|
|
@@ -401,6 +481,7 @@ def build_embeddings(
|
|
|
401
481
|
|
|
402
482
|
# Check if already exists
|
|
403
483
|
if not force and embeddings_path.exists() and (embeddings_path / "metadata.json").exists():
|
|
484
|
+
logger.info("Embedding build skipped; existing index at %s", embeddings_path)
|
|
404
485
|
return {
|
|
405
486
|
"status": "skipped",
|
|
406
487
|
"message": "Embeddings already exist. Use force=True to rebuild.",
|
|
@@ -415,6 +496,12 @@ def build_embeddings(
|
|
|
415
496
|
|
|
416
497
|
# Build embeddings
|
|
417
498
|
count = corpus.build_embeddings(force=force)
|
|
499
|
+
logger.info(
|
|
500
|
+
"Embedding build complete items=%s provider=%s model=%s",
|
|
501
|
+
count,
|
|
502
|
+
embedding_provider.provider_name,
|
|
503
|
+
embedding_provider.model,
|
|
504
|
+
)
|
|
418
505
|
|
|
419
506
|
# Update global corpus to use new embeddings
|
|
420
507
|
_corpus = Corpus(embeddings_path=embeddings_path)
|
|
@@ -436,7 +523,7 @@ def build_embeddings(
|
|
|
436
523
|
# Each agent has a specific role in the IF creation workflow.
|
|
437
524
|
|
|
438
525
|
|
|
439
|
-
@
|
|
526
|
+
@prompt(
|
|
440
527
|
name="if_story_architect",
|
|
441
528
|
description="System prompt for an IF Story Architect - an orchestrator agent that "
|
|
442
529
|
"plans narrative structure, decomposes IF projects, and coordinates creation.",
|
|
@@ -470,7 +557,7 @@ def if_story_architect_prompt(
|
|
|
470
557
|
return [Message(template, role="user")]
|
|
471
558
|
|
|
472
559
|
|
|
473
|
-
@
|
|
560
|
+
@prompt(
|
|
474
561
|
name="if_prose_writer",
|
|
475
562
|
description="System prompt for an IF Prose Writer - a specialist agent that "
|
|
476
563
|
"creates narrative content including prose, dialogue, and scene text.",
|
|
@@ -504,7 +591,7 @@ def if_prose_writer_prompt(
|
|
|
504
591
|
return [Message(template, role="user")]
|
|
505
592
|
|
|
506
593
|
|
|
507
|
-
@
|
|
594
|
+
@prompt(
|
|
508
595
|
name="if_quality_reviewer",
|
|
509
596
|
description="System prompt for an IF Quality Reviewer - a validator agent that "
|
|
510
597
|
"reviews IF content for craft quality, consistency, and standards compliance.",
|
|
@@ -530,7 +617,7 @@ def if_quality_reviewer_prompt(
|
|
|
530
617
|
return [Message(template, role="user")]
|
|
531
618
|
|
|
532
619
|
|
|
533
|
-
@
|
|
620
|
+
@prompt(
|
|
534
621
|
name="if_genre_consultant",
|
|
535
622
|
description="System prompt for an IF Genre Consultant - a researcher agent that "
|
|
536
623
|
"provides genre-specific guidance on conventions, tropes, and reader expectations.",
|
|
@@ -563,7 +650,7 @@ def if_genre_consultant_prompt(
|
|
|
563
650
|
return [Message(template, role="user")]
|
|
564
651
|
|
|
565
652
|
|
|
566
|
-
@
|
|
653
|
+
@prompt(
|
|
567
654
|
name="if_world_curator",
|
|
568
655
|
description="System prompt for an IF World Curator - a curator agent that "
|
|
569
656
|
"maintains world consistency, manages canon, and ensures setting coherence.",
|
|
@@ -596,7 +683,7 @@ def if_world_curator_prompt(
|
|
|
596
683
|
return [Message(template, role="user")]
|
|
597
684
|
|
|
598
685
|
|
|
599
|
-
@
|
|
686
|
+
@prompt(
|
|
600
687
|
name="if_platform_advisor",
|
|
601
688
|
description="System prompt for an IF Platform Advisor - a researcher agent that "
|
|
602
689
|
"provides guidance on tools, platforms, and technical implementation.",
|
|
@@ -629,7 +716,7 @@ def if_platform_advisor_prompt(
|
|
|
629
716
|
return [Message(template, role="user")]
|
|
630
717
|
|
|
631
718
|
|
|
632
|
-
@
|
|
719
|
+
@tool
|
|
633
720
|
def list_subagents() -> list[dict[str, Any]]:
|
|
634
721
|
"""List all available IF subagent prompts.
|
|
635
722
|
|
|
@@ -639,6 +726,7 @@ def list_subagents() -> list[dict[str, Any]]:
|
|
|
639
726
|
Returns:
|
|
640
727
|
List of subagents with name, description, and parameters.
|
|
641
728
|
"""
|
|
729
|
+
logger.debug("list_subagents invoked")
|
|
642
730
|
return [
|
|
643
731
|
{
|
|
644
732
|
"name": "if_story_architect",
|
|
@@ -706,8 +794,10 @@ def run_server(
|
|
|
706
794
|
>>> run_server(transport="http", host="0.0.0.0", port=8080)
|
|
707
795
|
"""
|
|
708
796
|
if transport == "http":
|
|
797
|
+
logger.info("Starting MCP server (http) host=%s port=%s", host, port)
|
|
709
798
|
mcp.run(transport="http", host=host, port=port)
|
|
710
799
|
else:
|
|
800
|
+
logger.info("Starting MCP server (stdio)")
|
|
711
801
|
mcp.run()
|
|
712
802
|
|
|
713
803
|
|
|
@@ -283,7 +283,7 @@ class OpenAIEmbeddings(EmbeddingProvider):
|
|
|
283
283
|
"https://api.openai.com/v1/models",
|
|
284
284
|
headers={"Authorization": f"Bearer {self._api_key}"},
|
|
285
285
|
)
|
|
286
|
-
return response.status_code == 200
|
|
286
|
+
return bool(response.status_code == 200)
|
|
287
287
|
|
|
288
288
|
except httpx.RequestError as e:
|
|
289
289
|
logger.debug(f"OpenAI availability check failed: {e}")
|
|
@@ -40,12 +40,24 @@ Classes:
|
|
|
40
40
|
|
|
41
41
|
from __future__ import annotations
|
|
42
42
|
|
|
43
|
+
import logging
|
|
43
44
|
from dataclasses import dataclass
|
|
44
45
|
from pathlib import Path
|
|
45
46
|
from typing import TYPE_CHECKING, Any, Literal
|
|
46
47
|
|
|
47
48
|
from ifcraftcorpus.index import CorpusIndex
|
|
48
49
|
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _truncate(value: str, limit: int = 120) -> str:
|
|
54
|
+
"""Trim long query strings for readable logging."""
|
|
55
|
+
|
|
56
|
+
if len(value) <= limit:
|
|
57
|
+
return value
|
|
58
|
+
return f"{value[:limit]}..."
|
|
59
|
+
|
|
60
|
+
|
|
49
61
|
if TYPE_CHECKING:
|
|
50
62
|
from ifcraftcorpus.embeddings import EmbeddingIndex
|
|
51
63
|
from ifcraftcorpus.providers import EmbeddingProvider
|
|
@@ -193,6 +205,14 @@ class Corpus:
|
|
|
193
205
|
self._fts_index: CorpusIndex | None = None
|
|
194
206
|
self._embedding_index: EmbeddingIndex | None = None # Lazy loaded
|
|
195
207
|
|
|
208
|
+
logger.debug(
|
|
209
|
+
"Corpus init corpus_dir=%s index_path=%s embeddings_path=%s use_bundled=%s",
|
|
210
|
+
corpus_dir,
|
|
211
|
+
index_path,
|
|
212
|
+
embeddings_path,
|
|
213
|
+
use_bundled,
|
|
214
|
+
)
|
|
215
|
+
|
|
196
216
|
def _get_corpus_dir(self) -> Path:
|
|
197
217
|
"""Get the corpus directory path.
|
|
198
218
|
|
|
@@ -203,6 +223,7 @@ class Corpus:
|
|
|
203
223
|
ValueError: If no corpus directory can be found.
|
|
204
224
|
"""
|
|
205
225
|
if self._corpus_dir:
|
|
226
|
+
logger.debug("Using provided corpus directory: %s", self._corpus_dir)
|
|
206
227
|
return self._corpus_dir
|
|
207
228
|
|
|
208
229
|
if self._use_bundled:
|
|
@@ -215,15 +236,17 @@ class Corpus:
|
|
|
215
236
|
# Check for installed shared data (pip install)
|
|
216
237
|
bundled = Path(sys.prefix) / "share" / "ifcraftcorpus" / "corpus"
|
|
217
238
|
if bundled.exists():
|
|
239
|
+
logger.debug("Using bundled corpus directory: %s", bundled)
|
|
218
240
|
return bundled
|
|
219
241
|
|
|
220
242
|
# Check relative to package (development mode / editable install)
|
|
221
243
|
pkg_dir = Path(ifcraftcorpus.__file__).parent
|
|
222
244
|
dev_corpus = pkg_dir.parent.parent / "corpus"
|
|
223
245
|
if dev_corpus.exists():
|
|
246
|
+
logger.debug("Using development corpus directory: %s", dev_corpus)
|
|
224
247
|
return dev_corpus
|
|
225
248
|
except Exception:
|
|
226
|
-
|
|
249
|
+
logger.debug("Failed to auto-detect bundled corpus directory", exc_info=True)
|
|
227
250
|
|
|
228
251
|
raise ValueError(
|
|
229
252
|
"No corpus directory found. Provide corpus_dir or install package with bundled corpus."
|
|
@@ -240,11 +263,13 @@ class Corpus:
|
|
|
240
263
|
"""
|
|
241
264
|
if self._fts_index is None:
|
|
242
265
|
if self._index_path and self._index_path.exists():
|
|
266
|
+
logger.debug("Loading corpus index from %s", self._index_path)
|
|
243
267
|
self._fts_index = CorpusIndex(self._index_path)
|
|
244
268
|
else:
|
|
245
269
|
# Build in-memory index
|
|
246
|
-
self._fts_index = CorpusIndex()
|
|
247
270
|
corpus_dir = self._get_corpus_dir()
|
|
271
|
+
logger.debug("Building in-memory corpus index from %s", corpus_dir)
|
|
272
|
+
self._fts_index = CorpusIndex()
|
|
248
273
|
self._fts_index.build_from_directory(corpus_dir)
|
|
249
274
|
return self._fts_index
|
|
250
275
|
|
|
@@ -259,6 +284,7 @@ class Corpus:
|
|
|
259
284
|
EmbeddingIndex instance or None if unavailable.
|
|
260
285
|
"""
|
|
261
286
|
if self._embedding_index is None and self._embeddings_path:
|
|
287
|
+
logger.debug("Attempting to load embeddings from %s", self._embeddings_path)
|
|
262
288
|
try:
|
|
263
289
|
from ifcraftcorpus.embeddings import EmbeddingIndex
|
|
264
290
|
|
|
@@ -270,7 +296,9 @@ class Corpus:
|
|
|
270
296
|
self._embeddings_path, provider=self._embedding_provider
|
|
271
297
|
)
|
|
272
298
|
except ImportError:
|
|
273
|
-
|
|
299
|
+
logger.debug("Embedding support not installed", exc_info=True)
|
|
300
|
+
elif self._embedding_index is None and not self._embeddings_path:
|
|
301
|
+
logger.debug("No embeddings path configured; semantic search disabled")
|
|
274
302
|
return self._embedding_index
|
|
275
303
|
|
|
276
304
|
def build_embeddings(self, *, force: bool = False) -> int:
|
|
@@ -310,12 +338,14 @@ class Corpus:
|
|
|
310
338
|
and self._embeddings_path.exists()
|
|
311
339
|
and (self._embeddings_path / "metadata.json").exists()
|
|
312
340
|
):
|
|
341
|
+
logger.info("Embeddings already exist at %s; skipping rebuild", self._embeddings_path)
|
|
313
342
|
return 0
|
|
314
343
|
|
|
315
344
|
from ifcraftcorpus.embeddings import EmbeddingIndex
|
|
316
345
|
|
|
317
346
|
embedding_index = EmbeddingIndex(provider=self._embedding_provider)
|
|
318
347
|
|
|
348
|
+
logger.info("Building embeddings into %s", self._embeddings_path)
|
|
319
349
|
count = 0
|
|
320
350
|
for doc_info in self.list_documents():
|
|
321
351
|
doc = self.get_document(doc_info["name"])
|
|
@@ -360,6 +390,7 @@ class Corpus:
|
|
|
360
390
|
embedding_index.save(self._embeddings_path)
|
|
361
391
|
self._embedding_index = embedding_index
|
|
362
392
|
|
|
393
|
+
logger.info("Saved embeddings (%s items) to %s", count, self._embeddings_path)
|
|
363
394
|
return count
|
|
364
395
|
|
|
365
396
|
def search(
|
|
@@ -408,6 +439,14 @@ class Corpus:
|
|
|
408
439
|
>>> # Semantic search (if embeddings available)
|
|
409
440
|
>>> results = corpus.search("scary atmosphere", mode="semantic")
|
|
410
441
|
"""
|
|
442
|
+
logger.debug(
|
|
443
|
+
"Corpus.search query=%r cluster=%s limit=%s mode=%s",
|
|
444
|
+
_truncate(query),
|
|
445
|
+
cluster,
|
|
446
|
+
limit,
|
|
447
|
+
mode,
|
|
448
|
+
)
|
|
449
|
+
|
|
411
450
|
results: list[CorpusResult] = []
|
|
412
451
|
|
|
413
452
|
if mode in ("keyword", "hybrid"):
|
|
@@ -458,6 +497,11 @@ class Corpus:
|
|
|
458
497
|
unique_results.append(result)
|
|
459
498
|
results = unique_results[:limit]
|
|
460
499
|
|
|
500
|
+
logger.debug(
|
|
501
|
+
"Corpus.search returning %s results (mode=%s)",
|
|
502
|
+
len(results),
|
|
503
|
+
mode,
|
|
504
|
+
)
|
|
461
505
|
return results
|
|
462
506
|
|
|
463
507
|
def get_document(self, name: str) -> dict[str, Any] | None:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/accessibility_guidelines.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/audience-and-access/audience_targeting.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/audio_visual_integration.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/collaborative_if_writing.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/creative_workflow_pipeline.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/idea_capture_and_hooks.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/player_analytics_metrics.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/quality_standards_if.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/research_and_verification.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/craft-foundations/testing_interactive_fiction.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/genre-conventions/children_and_ya_conventions.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/branching_narrative_craft.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/episodic_serialized_if.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/nonlinear_structure.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/pacing_and_tension.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/romance_and_relationships.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/narrative-structure/scene_structure_and_beats.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/exposition_techniques.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/narrative_point_of_view.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/subtext_and_implication.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/prose-and-language/voice_register_consistency.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/setting_as_character.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.0 → ifcraftcorpus-1.2.1}/corpus/world-and-setting/worldbuilding_patterns.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|