ifcraftcorpus 1.2.1__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/PKG-INFO +1 -1
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/pyproject.toml +1 -1
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/cli.py +21 -5
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/index.py +120 -43
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/mcp_server.py +56 -15
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/providers.py +48 -10
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/.gitignore +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/LICENSE +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/LICENSE-CONTENT +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/README.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/agent-design/agent_prompt_engineering.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/agent-design/multi_agent_patterns.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/audience-and-access/accessibility_guidelines.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/audience-and-access/audience_targeting.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/audience-and-access/localization_considerations.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/audio_visual_integration.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/collaborative_if_writing.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/creative_workflow_pipeline.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/diegetic_design.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/idea_capture_and_hooks.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/if_platform_tools.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/player_analytics_metrics.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/quality_standards_if.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/research_and_verification.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/testing_interactive_fiction.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/emotional-design/conflict_patterns.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/emotional-design/emotional_beats.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/game-design/mechanics_design_patterns.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/children_and_ya_conventions.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/fantasy_conventions.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/historical_fiction.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/horror_conventions.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/mystery_conventions.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/sci_fi_conventions.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/branching_narrative_construction.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/branching_narrative_craft.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/endings_patterns.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/episodic_serialized_if.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/nonlinear_structure.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/pacing_and_tension.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/romance_and_relationships.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/scene_structure_and_beats.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/scene_transitions.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/character_voice.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/dialogue_craft.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/exposition_techniques.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/narrative_point_of_view.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/prose_patterns.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/subtext_and_implication.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/voice_register_consistency.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/scope-and-planning/scope_and_length.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/world-and-setting/canon_management.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/world-and-setting/setting_as_character.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/world-and-setting/worldbuilding_patterns.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/__init__.py +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/embeddings.py +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/logging_utils.py +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/parser.py +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/py.typed +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/src/ifcraftcorpus/search.py +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/README.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/if_genre_consultant.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/if_platform_advisor.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/if_prose_writer.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/if_quality_reviewer.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/if_story_architect.md +0 -0
- {ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/subagents/if_world_curator.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ifcraftcorpus
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: Interactive fiction craft corpus with search library and MCP server
|
|
5
5
|
Project-URL: Homepage, https://pvliesdonk.github.io/if-craft-corpus
|
|
6
6
|
Project-URL: Repository, https://github.com/pvliesdonk/if-craft-corpus
|
|
@@ -125,7 +125,10 @@ def cmd_embeddings_status(args: argparse.Namespace) -> int:
|
|
|
125
125
|
status = "✓ Available" if available else "✗ Not available"
|
|
126
126
|
print(f"{name:20} {status}")
|
|
127
127
|
if available:
|
|
128
|
-
|
|
128
|
+
extra_info = ""
|
|
129
|
+
if hasattr(provider, "cpu_only") and provider.cpu_only:
|
|
130
|
+
extra_info = " [CPU-only]"
|
|
131
|
+
print(f"{'':20} Model: {provider.model} ({provider.dimension}d){extra_info}")
|
|
129
132
|
|
|
130
133
|
# Auto-detect
|
|
131
134
|
print("\n=== Auto-Detection ===\n")
|
|
@@ -164,9 +167,10 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
164
167
|
|
|
165
168
|
# Get provider
|
|
166
169
|
provider: EmbeddingProvider | None = None
|
|
170
|
+
cpu_only = getattr(args, "cpu_only", False)
|
|
167
171
|
if args.provider:
|
|
168
172
|
if args.provider == "ollama":
|
|
169
|
-
provider = OllamaEmbeddings(model=args.model, host=args.ollama_host)
|
|
173
|
+
provider = OllamaEmbeddings(model=args.model, host=args.ollama_host, cpu_only=cpu_only)
|
|
170
174
|
elif args.provider == "openai":
|
|
171
175
|
provider = OpenAIEmbeddings(model=args.model, api_key=args.openai_key)
|
|
172
176
|
elif args.provider in ("sentence-transformers", "st", "local"):
|
|
@@ -175,7 +179,7 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
175
179
|
print(f"Unknown provider: {args.provider}", file=sys.stderr)
|
|
176
180
|
return 1
|
|
177
181
|
else:
|
|
178
|
-
provider = get_embedding_provider(model=args.model)
|
|
182
|
+
provider = get_embedding_provider(model=args.model, cpu_only=cpu_only)
|
|
179
183
|
|
|
180
184
|
if not provider:
|
|
181
185
|
print("No embedding provider available.", file=sys.stderr)
|
|
@@ -186,13 +190,19 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
186
190
|
print(f"Provider {provider.provider_name} is not available.", file=sys.stderr)
|
|
187
191
|
return 1
|
|
188
192
|
|
|
193
|
+
# Show CPU-only status for Ollama
|
|
194
|
+
cpu_only_status = ""
|
|
195
|
+
if hasattr(provider, "cpu_only") and provider.cpu_only:
|
|
196
|
+
cpu_only_status = " (CPU-only)"
|
|
197
|
+
|
|
189
198
|
logger.info(
|
|
190
|
-
"CLI embeddings build provider=%s model=%s output=%s",
|
|
199
|
+
"CLI embeddings build provider=%s model=%s output=%s cpu_only=%s",
|
|
191
200
|
provider.provider_name,
|
|
192
201
|
provider.model,
|
|
193
202
|
args.output,
|
|
203
|
+
getattr(provider, "cpu_only", False),
|
|
194
204
|
)
|
|
195
|
-
print(f"Using provider: {provider.provider_name}")
|
|
205
|
+
print(f"Using provider: {provider.provider_name}{cpu_only_status}")
|
|
196
206
|
print(f"Model: {provider.model} ({provider.dimension}d)")
|
|
197
207
|
|
|
198
208
|
# Build embeddings
|
|
@@ -308,6 +318,12 @@ def main() -> int:
|
|
|
308
318
|
)
|
|
309
319
|
emb_build.add_argument("--ollama-host", help="Ollama host URL")
|
|
310
320
|
emb_build.add_argument("--openai-key", help="OpenAI API key")
|
|
321
|
+
emb_build.add_argument(
|
|
322
|
+
"--cpu-only",
|
|
323
|
+
action="store_true",
|
|
324
|
+
help="Force CPU-only inference for Ollama (num_gpu=0). "
|
|
325
|
+
"Useful when GPU is under VRAM pressure.",
|
|
326
|
+
)
|
|
311
327
|
emb_build.set_defaults(func=cmd_embeddings_build)
|
|
312
328
|
|
|
313
329
|
args = parser.parse_args()
|
|
@@ -56,21 +56,58 @@ from ifcraftcorpus.parser import Document, parse_directory
|
|
|
56
56
|
def _sanitize_fts_query(query: str) -> str:
|
|
57
57
|
"""Sanitize a query string for the FTS5 MATCH clause.
|
|
58
58
|
|
|
59
|
-
This function replaces
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
This function replaces special characters that could cause FTS5 syntax
|
|
60
|
+
errors with spaces. This is intended to correctly handle natural language
|
|
61
|
+
queries from LLMs and users, for example transforming:
|
|
62
|
+
- "haunted-house" into "haunted house" (hyphen as NOT operator)
|
|
63
|
+
- "dialogue, subtext" into "dialogue subtext" (comma syntax error)
|
|
63
64
|
|
|
64
65
|
It also collapses any resulting multiple spaces into a single space.
|
|
65
66
|
|
|
67
|
+
Note: This is used as a fallback when raw FTS5 queries fail. The search
|
|
68
|
+
method first tries the raw query to support advanced FTS5 syntax, then
|
|
69
|
+
falls back to sanitized query on syntax errors.
|
|
70
|
+
See https://github.com/pvliesdonk/if-craft-corpus/issues/10
|
|
71
|
+
|
|
66
72
|
Args:
|
|
67
73
|
query: Raw query string from user input.
|
|
68
74
|
|
|
69
75
|
Returns:
|
|
70
76
|
Sanitized query safe for FTS5 MATCH.
|
|
71
77
|
"""
|
|
72
|
-
# Replace
|
|
73
|
-
|
|
78
|
+
# Replace problematic characters with spaces:
|
|
79
|
+
# - hyphen: FTS5 interprets as NOT operator
|
|
80
|
+
# - comma: FTS5 column list syntax
|
|
81
|
+
# - parentheses: FTS5 grouping syntax
|
|
82
|
+
# - curly braces: FTS5 column filter syntax
|
|
83
|
+
# - caret: FTS5 position marker
|
|
84
|
+
# - plus: FTS5 column weight
|
|
85
|
+
# - colon after words could affect column queries, but we preserve it
|
|
86
|
+
# to allow intentional column:value syntax
|
|
87
|
+
# Using str.translate is more efficient for replacing multiple single characters.
|
|
88
|
+
translation_table = str.maketrans("-,(){}^+", " " * 8)
|
|
89
|
+
sanitized = query.translate(translation_table)
|
|
90
|
+
# Collapse whitespace
|
|
91
|
+
return " ".join(sanitized.split())
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _is_fts5_query_error(error: sqlite3.OperationalError) -> bool:
|
|
95
|
+
"""Check if an OperationalError is an FTS5 query parsing error.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
error: The SQLite OperationalError to check.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
True if this is an FTS5 query error that might be recoverable
|
|
102
|
+
by sanitizing the query.
|
|
103
|
+
"""
|
|
104
|
+
msg = str(error).lower()
|
|
105
|
+
# FTS5 syntax errors (e.g., "fts5: syntax error near ','")
|
|
106
|
+
if "fts5" in msg and "syntax error" in msg:
|
|
107
|
+
return True
|
|
108
|
+
# Column errors from FTS5 query parsing (e.g., "no such column: voice")
|
|
109
|
+
# This can happen when hyphens are interpreted as column filters
|
|
110
|
+
return "no such column" in msg
|
|
74
111
|
|
|
75
112
|
|
|
76
113
|
@dataclass
|
|
@@ -359,51 +396,25 @@ class CorpusIndex:
|
|
|
359
396
|
self.add_document(doc)
|
|
360
397
|
return len(documents)
|
|
361
398
|
|
|
362
|
-
def
|
|
399
|
+
def _execute_fts_query(
|
|
363
400
|
self,
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
limit: int = 10,
|
|
401
|
+
fts_query: str,
|
|
402
|
+
cluster: str | None,
|
|
403
|
+
limit: int,
|
|
368
404
|
) -> list[SearchResult]:
|
|
369
|
-
"""
|
|
370
|
-
|
|
371
|
-
Performs a keyword search using SQLite FTS5 with BM25 ranking.
|
|
372
|
-
Supports the full FTS5 query syntax for advanced searches.
|
|
405
|
+
"""Execute an FTS5 query and return results.
|
|
373
406
|
|
|
374
407
|
Args:
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
- Phrases: ``"character voice"``
|
|
379
|
-
- Boolean: ``tension AND suspense``, ``horror NOT comedy``
|
|
380
|
-
- Prefix: ``narrat*``
|
|
381
|
-
- Column-specific: ``title:craft``
|
|
382
|
-
|
|
383
|
-
cluster: Optional cluster name to filter results. Only returns
|
|
384
|
-
matches from the specified cluster.
|
|
385
|
-
limit: Maximum number of results to return. Default 10.
|
|
408
|
+
fts_query: The FTS5 query string.
|
|
409
|
+
cluster: Optional cluster filter.
|
|
410
|
+
limit: Maximum results to return.
|
|
386
411
|
|
|
387
412
|
Returns:
|
|
388
|
-
List of
|
|
389
|
-
score in descending order (best matches first).
|
|
413
|
+
List of SearchResult objects.
|
|
390
414
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
>>> results = index.search("dialogue")
|
|
394
|
-
|
|
395
|
-
>>> # Phrase search
|
|
396
|
-
>>> results = index.search('"character voice"')
|
|
397
|
-
|
|
398
|
-
>>> # Boolean with cluster filter
|
|
399
|
-
>>> results = index.search("tension OR suspense",
|
|
400
|
-
... cluster="emotional-design",
|
|
401
|
-
... limit=5)
|
|
415
|
+
Raises:
|
|
416
|
+
sqlite3.OperationalError: If the query has invalid FTS5 syntax.
|
|
402
417
|
"""
|
|
403
|
-
# Build FTS5 query - sanitize to handle special characters
|
|
404
|
-
fts_query = _sanitize_fts_query(query)
|
|
405
|
-
|
|
406
|
-
# Add cluster filter if specified
|
|
407
418
|
where_clause = ""
|
|
408
419
|
params: list[str | int] = [fts_query]
|
|
409
420
|
if cluster:
|
|
@@ -447,6 +458,72 @@ class CorpusIndex:
|
|
|
447
458
|
|
|
448
459
|
return results
|
|
449
460
|
|
|
461
|
+
def search(
|
|
462
|
+
self,
|
|
463
|
+
query: str,
|
|
464
|
+
*,
|
|
465
|
+
cluster: str | None = None,
|
|
466
|
+
limit: int = 10,
|
|
467
|
+
) -> list[SearchResult]:
|
|
468
|
+
"""Search the corpus using FTS5 full-text search.
|
|
469
|
+
|
|
470
|
+
Performs a keyword search using SQLite FTS5 with BM25 ranking.
|
|
471
|
+
Supports the full FTS5 query syntax for advanced searches.
|
|
472
|
+
|
|
473
|
+
The search first attempts to execute the query as-is to support
|
|
474
|
+
advanced FTS5 syntax. If that fails with a syntax error, it falls
|
|
475
|
+
back to a sanitized version of the query that treats special
|
|
476
|
+
characters as word separators.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
query: Search query. Supports FTS5 syntax:
|
|
480
|
+
|
|
481
|
+
- Simple keywords: ``dialogue``
|
|
482
|
+
- Phrases: ``"character voice"``
|
|
483
|
+
- Boolean: ``tension AND suspense``, ``horror NOT comedy``
|
|
484
|
+
- Prefix: ``narrat*``
|
|
485
|
+
- Column-specific: ``title:craft``
|
|
486
|
+
|
|
487
|
+
Natural language queries with punctuation (e.g., "dialogue,
|
|
488
|
+
subtext") are also supported - they will be automatically
|
|
489
|
+
sanitized if they cause syntax errors.
|
|
490
|
+
|
|
491
|
+
cluster: Optional cluster name to filter results. Only returns
|
|
492
|
+
matches from the specified cluster.
|
|
493
|
+
limit: Maximum number of results to return. Default 10.
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
List of :class:`SearchResult` objects, sorted by BM25 relevance
|
|
497
|
+
score in descending order (best matches first).
|
|
498
|
+
|
|
499
|
+
Example:
|
|
500
|
+
>>> # Simple search
|
|
501
|
+
>>> results = index.search("dialogue")
|
|
502
|
+
|
|
503
|
+
>>> # Phrase search
|
|
504
|
+
>>> results = index.search('"character voice"')
|
|
505
|
+
|
|
506
|
+
>>> # Boolean with cluster filter
|
|
507
|
+
>>> results = index.search("tension OR suspense",
|
|
508
|
+
... cluster="emotional-design",
|
|
509
|
+
... limit=5)
|
|
510
|
+
|
|
511
|
+
>>> # Natural language (auto-sanitized)
|
|
512
|
+
>>> results = index.search("dialogue, subtext")
|
|
513
|
+
"""
|
|
514
|
+
# Try raw query first to support advanced FTS5 syntax
|
|
515
|
+
try:
|
|
516
|
+
return self._execute_fts_query(query, cluster, limit)
|
|
517
|
+
except sqlite3.OperationalError as e:
|
|
518
|
+
if not _is_fts5_query_error(e):
|
|
519
|
+
raise
|
|
520
|
+
|
|
521
|
+
# Fallback to sanitized query for natural language input
|
|
522
|
+
sanitized = _sanitize_fts_query(query)
|
|
523
|
+
if not sanitized:
|
|
524
|
+
return []
|
|
525
|
+
return self._execute_fts_query(sanitized, cluster, limit)
|
|
526
|
+
|
|
450
527
|
def list_documents(self) -> list[dict[str, str]]:
|
|
451
528
|
"""List all indexed documents with their metadata.
|
|
452
529
|
|
|
@@ -194,9 +194,24 @@ def search_corpus(
|
|
|
194
194
|
worldbuilding, and genre conventions.
|
|
195
195
|
|
|
196
196
|
Args:
|
|
197
|
-
query: Search query
|
|
198
|
-
|
|
199
|
-
|
|
197
|
+
query: Search query. Supports natural language or FTS5 syntax:
|
|
198
|
+
|
|
199
|
+
Natural language examples:
|
|
200
|
+
- "dialogue subtext"
|
|
201
|
+
- "branching narrative"
|
|
202
|
+
- "pacing action scenes"
|
|
203
|
+
|
|
204
|
+
FTS5 advanced syntax:
|
|
205
|
+
- Exact phrases: '"character voice"'
|
|
206
|
+
- Boolean NOT: "dialogue NOT comedy"
|
|
207
|
+
- Boolean OR: "tension OR suspense"
|
|
208
|
+
- Boolean AND: "dialogue AND subtext"
|
|
209
|
+
- Prefix search: "narrat*"
|
|
210
|
+
- Column filter: "title:craft", "cluster:genre-conventions"
|
|
211
|
+
|
|
212
|
+
Natural language queries with punctuation are automatically
|
|
213
|
+
sanitized, so both styles work seamlessly.
|
|
214
|
+
|
|
200
215
|
cluster: Optional topic cluster to filter by. Valid clusters:
|
|
201
216
|
narrative-structure, prose-and-language, genre-conventions,
|
|
202
217
|
audience-and-access, world-and-setting, emotional-design,
|
|
@@ -367,11 +382,15 @@ def embeddings_status() -> dict[str, Any]:
|
|
|
367
382
|
try:
|
|
368
383
|
provider = cls()
|
|
369
384
|
available = provider.check_availability()
|
|
370
|
-
|
|
385
|
+
provider_info: dict[str, Any] = {
|
|
371
386
|
"available": available,
|
|
372
387
|
"model": provider.model if available else None,
|
|
373
388
|
"dimension": provider.dimension if available else None,
|
|
374
389
|
}
|
|
390
|
+
# Add cpu_only info for Ollama
|
|
391
|
+
if hasattr(provider, "cpu_only"):
|
|
392
|
+
provider_info["cpu_only"] = provider.cpu_only
|
|
393
|
+
result["providers"][name] = provider_info
|
|
375
394
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
376
395
|
logger.warning("Failed to inspect embedding provider %s: %s", name, exc)
|
|
377
396
|
result["providers"][name] = {"available": False, "error": "import_failed"}
|
|
@@ -409,6 +428,8 @@ def embeddings_status() -> dict[str, Any]:
|
|
|
409
428
|
@tool
|
|
410
429
|
def build_embeddings(
|
|
411
430
|
provider: str | None = None,
|
|
431
|
+
model: str | None = None,
|
|
432
|
+
cpu_only: bool | None = None,
|
|
412
433
|
force: bool = False,
|
|
413
434
|
) -> dict[str, Any]:
|
|
414
435
|
"""Build or rebuild the embedding index for semantic search.
|
|
@@ -420,6 +441,11 @@ def build_embeddings(
|
|
|
420
441
|
provider: Embedding provider to use: "ollama", "openai", or
|
|
421
442
|
"sentence_transformers". If None, auto-detects the best
|
|
422
443
|
available provider.
|
|
444
|
+
model: Embedding model name override. If None, uses provider default
|
|
445
|
+
or OLLAMA_MODEL env var for Ollama.
|
|
446
|
+
cpu_only: For Ollama provider, force CPU-only inference (num_gpu=0).
|
|
447
|
+
Useful when GPU is under VRAM pressure. If None, reads from
|
|
448
|
+
OLLAMA_CPU_ONLY env var.
|
|
423
449
|
force: If True, rebuild even if embeddings already exist.
|
|
424
450
|
|
|
425
451
|
Returns:
|
|
@@ -427,15 +453,23 @@ def build_embeddings(
|
|
|
427
453
|
|
|
428
454
|
Note:
|
|
429
455
|
Ollama requires a running Ollama server (configure with OLLAMA_HOST env).
|
|
456
|
+
Set OLLAMA_CPU_ONLY=true to force CPU inference.
|
|
430
457
|
OpenAI requires OPENAI_API_KEY environment variable.
|
|
431
458
|
SentenceTransformers requires the sentence-transformers package.
|
|
432
459
|
"""
|
|
433
460
|
global _corpus
|
|
434
461
|
|
|
435
|
-
logger.info(
|
|
462
|
+
logger.info(
|
|
463
|
+
"build_embeddings requested provider=%s model=%s cpu_only=%s force=%s",
|
|
464
|
+
provider,
|
|
465
|
+
model,
|
|
466
|
+
cpu_only,
|
|
467
|
+
force,
|
|
468
|
+
)
|
|
436
469
|
|
|
437
470
|
try:
|
|
438
471
|
from ifcraftcorpus.providers import (
|
|
472
|
+
EmbeddingProvider,
|
|
439
473
|
OllamaEmbeddings,
|
|
440
474
|
OpenAIEmbeddings,
|
|
441
475
|
SentenceTransformersEmbeddings,
|
|
@@ -449,21 +483,22 @@ def build_embeddings(
|
|
|
449
483
|
}
|
|
450
484
|
|
|
451
485
|
# Get provider
|
|
452
|
-
embedding_provider = None
|
|
486
|
+
embedding_provider: EmbeddingProvider | None = None
|
|
453
487
|
if provider:
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
488
|
+
provider_lower = provider.lower()
|
|
489
|
+
if provider_lower == "ollama":
|
|
490
|
+
embedding_provider = OllamaEmbeddings(model=model, cpu_only=cpu_only)
|
|
491
|
+
elif provider_lower == "openai":
|
|
492
|
+
embedding_provider = OpenAIEmbeddings(model=model)
|
|
493
|
+
elif provider_lower == "sentence_transformers":
|
|
494
|
+
embedding_provider = SentenceTransformersEmbeddings(model=model)
|
|
495
|
+
else:
|
|
460
496
|
logger.warning("Unknown embeddings provider requested: %s", provider)
|
|
461
497
|
return {
|
|
462
498
|
"error": f"Unknown provider: {provider}. Use: ollama, openai, sentence_transformers"
|
|
463
499
|
}
|
|
464
|
-
embedding_provider = provider_map[provider]()
|
|
465
500
|
else:
|
|
466
|
-
embedding_provider = get_embedding_provider()
|
|
501
|
+
embedding_provider = get_embedding_provider(model=model, cpu_only=cpu_only)
|
|
467
502
|
|
|
468
503
|
if not embedding_provider:
|
|
469
504
|
logger.warning("No embedding provider available for build request")
|
|
@@ -506,7 +541,7 @@ def build_embeddings(
|
|
|
506
541
|
# Update global corpus to use new embeddings
|
|
507
542
|
_corpus = Corpus(embeddings_path=embeddings_path)
|
|
508
543
|
|
|
509
|
-
|
|
544
|
+
result = {
|
|
510
545
|
"status": "success",
|
|
511
546
|
"items_embedded": count,
|
|
512
547
|
"provider": embedding_provider.provider_name,
|
|
@@ -514,6 +549,12 @@ def build_embeddings(
|
|
|
514
549
|
"path": str(embeddings_path),
|
|
515
550
|
}
|
|
516
551
|
|
|
552
|
+
# Add cpu_only info for Ollama
|
|
553
|
+
if hasattr(embedding_provider, "cpu_only"):
|
|
554
|
+
result["cpu_only"] = embedding_provider.cpu_only
|
|
555
|
+
|
|
556
|
+
return result
|
|
557
|
+
|
|
517
558
|
|
|
518
559
|
# =============================================================================
|
|
519
560
|
# Subagent Prompts
|
|
@@ -9,7 +9,12 @@ Supports multiple backends:
|
|
|
9
9
|
Provider selection via environment:
|
|
10
10
|
- EMBEDDING_PROVIDER: "ollama", "openai", or "sentence-transformers"
|
|
11
11
|
- OLLAMA_HOST: Ollama server URL (default: http://localhost:11434)
|
|
12
|
+
- IFCRAFTCORPUS_OLLAMA_MODEL: Ollama embedding model (default: nomic-embed-text)
|
|
13
|
+
- IFCRAFTCORPUS_OLLAMA_CPU_ONLY: Set to "true" or "1" to force CPU-only inference
|
|
12
14
|
- OPENAI_API_KEY: OpenAI API key (required for openai provider)
|
|
15
|
+
|
|
16
|
+
Note: The IFCRAFTCORPUS_ prefix avoids conflicts with other applications
|
|
17
|
+
that may use Ollama with different model/GPU configurations.
|
|
13
18
|
"""
|
|
14
19
|
|
|
15
20
|
from __future__ import annotations
|
|
@@ -113,24 +118,44 @@ class OllamaEmbeddings(EmbeddingProvider):
|
|
|
113
118
|
Recommended model: nomic-embed-text (768 dimensions)
|
|
114
119
|
|
|
115
120
|
Requires: httpx (pip install httpx)
|
|
121
|
+
|
|
122
|
+
Environment variables:
|
|
123
|
+
OLLAMA_HOST: Server URL (default: http://localhost:11434)
|
|
124
|
+
IFCRAFTCORPUS_OLLAMA_MODEL: Model name (default: nomic-embed-text)
|
|
125
|
+
IFCRAFTCORPUS_OLLAMA_CPU_ONLY: Set to "true" or "1" to force CPU-only inference
|
|
126
|
+
|
|
127
|
+
Note: The IFCRAFTCORPUS_ prefix avoids conflicts with other applications
|
|
128
|
+
that may use Ollama with different model/GPU configurations.
|
|
116
129
|
"""
|
|
117
130
|
|
|
118
131
|
def __init__(
|
|
119
132
|
self,
|
|
120
133
|
model: str | None = None,
|
|
121
134
|
host: str | None = None,
|
|
135
|
+
cpu_only: bool | None = None,
|
|
122
136
|
):
|
|
123
137
|
"""
|
|
124
138
|
Initialize Ollama embeddings.
|
|
125
139
|
|
|
126
140
|
Args:
|
|
127
|
-
model: Embedding model name (default: nomic-embed-text
|
|
128
|
-
|
|
141
|
+
model: Embedding model name (default: nomic-embed-text, or
|
|
142
|
+
IFCRAFTCORPUS_OLLAMA_MODEL env)
|
|
143
|
+
host: Ollama host URL (default: http://localhost:11434, or OLLAMA_HOST env)
|
|
144
|
+
cpu_only: Force CPU-only inference with num_gpu=0 (default: False, or
|
|
145
|
+
IFCRAFTCORPUS_OLLAMA_CPU_ONLY env). Useful when GPU is under
|
|
146
|
+
VRAM pressure or to avoid contention with other GPU workloads.
|
|
129
147
|
"""
|
|
130
|
-
self._model = model or DEFAULT_MODELS["ollama"]
|
|
148
|
+
self._model = model or os.getenv("IFCRAFTCORPUS_OLLAMA_MODEL") or DEFAULT_MODELS["ollama"]
|
|
131
149
|
self._host = host or os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
|
132
150
|
self._dimension = MODEL_DIMENSIONS.get(self._model, 768)
|
|
133
151
|
|
|
152
|
+
# CPU-only mode: check parameter, then env var
|
|
153
|
+
if cpu_only is not None:
|
|
154
|
+
self._cpu_only = cpu_only
|
|
155
|
+
else:
|
|
156
|
+
env_val = os.getenv("IFCRAFTCORPUS_OLLAMA_CPU_ONLY", "").lower()
|
|
157
|
+
self._cpu_only = env_val in ("true", "1", "yes")
|
|
158
|
+
|
|
134
159
|
@property
|
|
135
160
|
def model(self) -> str:
|
|
136
161
|
return self._model
|
|
@@ -143,20 +168,30 @@ class OllamaEmbeddings(EmbeddingProvider):
|
|
|
143
168
|
def provider_name(self) -> str:
|
|
144
169
|
return "ollama"
|
|
145
170
|
|
|
171
|
+
@property
|
|
172
|
+
def cpu_only(self) -> bool:
|
|
173
|
+
"""Whether CPU-only mode is enabled."""
|
|
174
|
+
return self._cpu_only
|
|
175
|
+
|
|
146
176
|
def embed(self, texts: list[str]) -> EmbeddingResult:
|
|
147
177
|
"""Embed texts using Ollama."""
|
|
148
178
|
import httpx
|
|
149
179
|
|
|
150
180
|
embeddings: list[list[float]] = []
|
|
151
181
|
|
|
182
|
+
# Build request payload
|
|
183
|
+
base_payload: dict[str, object] = {"model": self._model}
|
|
184
|
+
|
|
185
|
+
# Add options for CPU-only mode
|
|
186
|
+
if self._cpu_only:
|
|
187
|
+
base_payload["options"] = {"num_gpu": 0}
|
|
188
|
+
|
|
152
189
|
with httpx.Client(timeout=60.0) as client:
|
|
153
190
|
for text in texts:
|
|
191
|
+
payload = {**base_payload, "prompt": text}
|
|
154
192
|
response = client.post(
|
|
155
193
|
f"{self._host}/api/embeddings",
|
|
156
|
-
json=
|
|
157
|
-
"model": self._model,
|
|
158
|
-
"prompt": text,
|
|
159
|
-
},
|
|
194
|
+
json=payload,
|
|
160
195
|
)
|
|
161
196
|
response.raise_for_status()
|
|
162
197
|
data = response.json()
|
|
@@ -372,6 +407,7 @@ class SentenceTransformersEmbeddings(EmbeddingProvider):
|
|
|
372
407
|
def get_embedding_provider(
|
|
373
408
|
provider_name: str | None = None,
|
|
374
409
|
model: str | None = None,
|
|
410
|
+
cpu_only: bool | None = None,
|
|
375
411
|
) -> EmbeddingProvider | None:
|
|
376
412
|
"""
|
|
377
413
|
Get an embedding provider based on configuration.
|
|
@@ -384,6 +420,8 @@ def get_embedding_provider(
|
|
|
384
420
|
Args:
|
|
385
421
|
provider_name: Explicit provider name ("ollama", "openai", "sentence-transformers")
|
|
386
422
|
model: Optional model override
|
|
423
|
+
cpu_only: For Ollama, force CPU-only inference (num_gpu=0). If None, reads
|
|
424
|
+
from OLLAMA_CPU_ONLY env var.
|
|
387
425
|
|
|
388
426
|
Returns:
|
|
389
427
|
Configured EmbeddingProvider or None if none available
|
|
@@ -395,7 +433,7 @@ def get_embedding_provider(
|
|
|
395
433
|
name = name.lower()
|
|
396
434
|
provider: EmbeddingProvider
|
|
397
435
|
if name == "ollama":
|
|
398
|
-
provider = OllamaEmbeddings(model=model)
|
|
436
|
+
provider = OllamaEmbeddings(model=model, cpu_only=cpu_only)
|
|
399
437
|
elif name == "openai":
|
|
400
438
|
provider = OpenAIEmbeddings(model=model)
|
|
401
439
|
elif name in ("sentence-transformers", "st", "local"):
|
|
@@ -410,9 +448,9 @@ def get_embedding_provider(
|
|
|
410
448
|
return None
|
|
411
449
|
|
|
412
450
|
# Auto-detect: try Ollama first, then OpenAI, then SentenceTransformers
|
|
413
|
-
ollama = OllamaEmbeddings(model=model)
|
|
451
|
+
ollama = OllamaEmbeddings(model=model, cpu_only=cpu_only)
|
|
414
452
|
if ollama.check_availability():
|
|
415
|
-
logger.info(f"Using Ollama embeddings ({ollama.model})")
|
|
453
|
+
logger.info(f"Using Ollama embeddings ({ollama.model}, cpu_only={ollama.cpu_only})")
|
|
416
454
|
return ollama
|
|
417
455
|
|
|
418
456
|
openai = OpenAIEmbeddings(model=model)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/audience-and-access/accessibility_guidelines.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/audience-and-access/audience_targeting.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/audio_visual_integration.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/collaborative_if_writing.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/creative_workflow_pipeline.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/idea_capture_and_hooks.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/player_analytics_metrics.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/quality_standards_if.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/research_and_verification.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/craft-foundations/testing_interactive_fiction.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/genre-conventions/children_and_ya_conventions.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/branching_narrative_craft.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/episodic_serialized_if.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/nonlinear_structure.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/pacing_and_tension.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/romance_and_relationships.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/narrative-structure/scene_structure_and_beats.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/exposition_techniques.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/narrative_point_of_view.md
RENAMED
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/subtext_and_implication.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/prose-and-language/voice_register_consistency.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/world-and-setting/setting_as_character.md
RENAMED
|
File without changes
|
{ifcraftcorpus-1.2.1 → ifcraftcorpus-1.4.0}/corpus/world-and-setting/worldbuilding_patterns.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|