ifcraftcorpus 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/PKG-INFO +18 -1
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/README.md +17 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/pyproject.toml +12 -1
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/cli.py +67 -7
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/index.py +22 -2
- ifcraftcorpus-1.3.0/src/ifcraftcorpus/logging_utils.py +84 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/mcp_server.py +148 -32
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/providers.py +49 -11
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/search.py +47 -3
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/.gitignore +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/LICENSE +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/LICENSE-CONTENT +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/agent-design/agent_prompt_engineering.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/agent-design/multi_agent_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/audience-and-access/accessibility_guidelines.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/audience-and-access/audience_targeting.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/audience-and-access/localization_considerations.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/audio_visual_integration.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/collaborative_if_writing.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/creative_workflow_pipeline.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/diegetic_design.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/idea_capture_and_hooks.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/if_platform_tools.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/player_analytics_metrics.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/quality_standards_if.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/research_and_verification.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/craft-foundations/testing_interactive_fiction.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/emotional-design/conflict_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/emotional-design/emotional_beats.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/game-design/mechanics_design_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/genre-conventions/children_and_ya_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/genre-conventions/fantasy_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/genre-conventions/historical_fiction.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/genre-conventions/horror_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/genre-conventions/mystery_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/genre-conventions/sci_fi_conventions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/branching_narrative_construction.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/branching_narrative_craft.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/endings_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/episodic_serialized_if.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/nonlinear_structure.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/pacing_and_tension.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/romance_and_relationships.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/scene_structure_and_beats.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/narrative-structure/scene_transitions.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/character_voice.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/dialogue_craft.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/exposition_techniques.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/narrative_point_of_view.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/prose_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/subtext_and_implication.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/prose-and-language/voice_register_consistency.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/scope-and-planning/scope_and_length.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/world-and-setting/canon_management.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/world-and-setting/setting_as_character.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/corpus/world-and-setting/worldbuilding_patterns.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/__init__.py +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/embeddings.py +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/parser.py +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/src/ifcraftcorpus/py.typed +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/README.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/if_genre_consultant.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/if_platform_advisor.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/if_prose_writer.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/if_quality_reviewer.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/if_story_architect.md +0 -0
- {ifcraftcorpus-1.2.0 → ifcraftcorpus-1.3.0}/subagents/if_world_curator.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ifcraftcorpus
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Interactive fiction craft corpus with search library and MCP server
|
|
5
5
|
Project-URL: Homepage, https://pvliesdonk.github.io/if-craft-corpus
|
|
6
6
|
Project-URL: Repository, https://github.com/pvliesdonk/if-craft-corpus
|
|
@@ -124,6 +124,23 @@ results = corpus.search(
|
|
|
124
124
|
| agent-design | 2 | Multi-agent patterns, prompt engineering |
|
|
125
125
|
| game-design | 1 | Mechanics design patterns |
|
|
126
126
|
|
|
127
|
+
## Verbose Logging
|
|
128
|
+
|
|
129
|
+
Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
|
|
130
|
+
before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
|
|
131
|
+
emit detailed logs to stderr. Example:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
LOG_LEVEL=DEBUG ifcraftcorpus-mcp
|
|
135
|
+
|
|
136
|
+
# Docker
|
|
137
|
+
docker run -p 8000:8000 \
|
|
138
|
+
-e LOG_LEVEL=DEBUG \
|
|
139
|
+
ghcr.io/pvliesdonk/if-craft-corpus
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Logs never touch stdout, so stdio transports remain compatible.
|
|
143
|
+
|
|
127
144
|
## Documentation
|
|
128
145
|
|
|
129
146
|
Full documentation: https://pvliesdonk.github.io/if-craft-corpus
|
|
@@ -71,6 +71,23 @@ results = corpus.search(
|
|
|
71
71
|
| agent-design | 2 | Multi-agent patterns, prompt engineering |
|
|
72
72
|
| game-design | 1 | Mechanics design patterns |
|
|
73
73
|
|
|
74
|
+
## Verbose Logging
|
|
75
|
+
|
|
76
|
+
Set `LOG_LEVEL` (e.g., `INFO`, `DEBUG`) or the convenience flag `VERBOSE=1`
|
|
77
|
+
before launching `ifcraftcorpus`, `ifcraftcorpus-mcp`, or the Docker image to
|
|
78
|
+
emit detailed logs to stderr. Example:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
LOG_LEVEL=DEBUG ifcraftcorpus-mcp
|
|
82
|
+
|
|
83
|
+
# Docker
|
|
84
|
+
docker run -p 8000:8000 \
|
|
85
|
+
-e LOG_LEVEL=DEBUG \
|
|
86
|
+
ghcr.io/pvliesdonk/if-craft-corpus
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Logs never touch stdout, so stdio transports remain compatible.
|
|
90
|
+
|
|
74
91
|
## Documentation
|
|
75
92
|
|
|
76
93
|
Full documentation: https://pvliesdonk.github.io/if-craft-corpus
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ifcraftcorpus"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.3.0"
|
|
4
4
|
description = "Interactive fiction craft corpus with search library and MCP server"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -103,6 +103,17 @@ strict = true
|
|
|
103
103
|
warn_return_any = true
|
|
104
104
|
warn_unused_ignores = true
|
|
105
105
|
|
|
106
|
+
[[tool.mypy.overrides]]
|
|
107
|
+
module = [
|
|
108
|
+
"fastmcp",
|
|
109
|
+
"fastmcp.prompts",
|
|
110
|
+
"mcp.*",
|
|
111
|
+
"sentence_transformers",
|
|
112
|
+
"numpy",
|
|
113
|
+
"httpx",
|
|
114
|
+
]
|
|
115
|
+
ignore_missing_imports = true
|
|
116
|
+
|
|
106
117
|
[tool.pytest.ini_options]
|
|
107
118
|
testpaths = ["tests"]
|
|
108
119
|
addopts = "-v --tb=short"
|
|
@@ -17,6 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import argparse
|
|
19
19
|
import json
|
|
20
|
+
import logging
|
|
20
21
|
import sys
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from typing import TYPE_CHECKING
|
|
@@ -24,18 +25,38 @@ from typing import TYPE_CHECKING
|
|
|
24
25
|
if TYPE_CHECKING:
|
|
25
26
|
from ifcraftcorpus.providers import EmbeddingProvider
|
|
26
27
|
|
|
28
|
+
from ifcraftcorpus.logging_utils import configure_logging
|
|
29
|
+
|
|
30
|
+
configure_logging()
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _truncate(value: str, limit: int = 120) -> str:
|
|
35
|
+
"""Shorten long log values to keep CLI logs readable."""
|
|
36
|
+
|
|
37
|
+
if len(value) <= limit:
|
|
38
|
+
return value
|
|
39
|
+
return f"{value[:limit]}..."
|
|
40
|
+
|
|
27
41
|
|
|
28
42
|
def cmd_info(args: argparse.Namespace) -> int:
|
|
29
43
|
"""Show corpus information."""
|
|
30
44
|
from ifcraftcorpus import Corpus, __version__
|
|
31
45
|
|
|
32
46
|
corpus = Corpus()
|
|
47
|
+
clusters = corpus.list_clusters()
|
|
48
|
+
logger.info(
|
|
49
|
+
"CLI info command: version=%s docs=%s clusters=%s",
|
|
50
|
+
__version__,
|
|
51
|
+
corpus.document_count(),
|
|
52
|
+
len(clusters),
|
|
53
|
+
)
|
|
33
54
|
|
|
34
55
|
print(f"\nIF Craft Corpus v{__version__}")
|
|
35
56
|
print(f"Documents: {corpus.document_count()}")
|
|
36
|
-
print(f"Clusters: {len(
|
|
57
|
+
print(f"Clusters: {len(clusters)}")
|
|
37
58
|
print("\nClusters:")
|
|
38
|
-
for cluster in
|
|
59
|
+
for cluster in clusters:
|
|
39
60
|
docs = [d for d in corpus.list_documents() if d["cluster"] == cluster]
|
|
40
61
|
print(f" {cluster}: {len(docs)} file(s)")
|
|
41
62
|
|
|
@@ -47,6 +68,12 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
47
68
|
from ifcraftcorpus import Corpus
|
|
48
69
|
|
|
49
70
|
corpus = Corpus()
|
|
71
|
+
logger.info(
|
|
72
|
+
"CLI search query=%r cluster=%s limit=%s",
|
|
73
|
+
_truncate(args.query),
|
|
74
|
+
args.cluster,
|
|
75
|
+
args.limit,
|
|
76
|
+
)
|
|
50
77
|
results = corpus.search(
|
|
51
78
|
args.query,
|
|
52
79
|
limit=args.limit,
|
|
@@ -55,6 +82,7 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
55
82
|
)
|
|
56
83
|
|
|
57
84
|
if not results:
|
|
85
|
+
logger.info("CLI search returned no matches")
|
|
58
86
|
print("No results found.")
|
|
59
87
|
return 0
|
|
60
88
|
|
|
@@ -69,6 +97,7 @@ def cmd_search(args: argparse.Namespace) -> int:
|
|
|
69
97
|
content += "..."
|
|
70
98
|
print(f" {content}")
|
|
71
99
|
|
|
100
|
+
logger.info("CLI search returned %s results", len(results))
|
|
72
101
|
return 0
|
|
73
102
|
|
|
74
103
|
|
|
@@ -81,6 +110,7 @@ def cmd_embeddings_status(args: argparse.Namespace) -> int:
|
|
|
81
110
|
get_embedding_provider,
|
|
82
111
|
)
|
|
83
112
|
|
|
113
|
+
logger.debug("CLI embeddings status requested")
|
|
84
114
|
print("\n=== Embedding Providers ===\n")
|
|
85
115
|
|
|
86
116
|
# Check each provider
|
|
@@ -95,7 +125,10 @@ def cmd_embeddings_status(args: argparse.Namespace) -> int:
|
|
|
95
125
|
status = "✓ Available" if available else "✗ Not available"
|
|
96
126
|
print(f"{name:20} {status}")
|
|
97
127
|
if available:
|
|
98
|
-
|
|
128
|
+
extra_info = ""
|
|
129
|
+
if hasattr(provider, "cpu_only") and provider.cpu_only:
|
|
130
|
+
extra_info = " [CPU-only]"
|
|
131
|
+
print(f"{'':20} Model: {provider.model} ({provider.dimension}d){extra_info}")
|
|
99
132
|
|
|
100
133
|
# Auto-detect
|
|
101
134
|
print("\n=== Auto-Detection ===\n")
|
|
@@ -134,9 +167,10 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
134
167
|
|
|
135
168
|
# Get provider
|
|
136
169
|
provider: EmbeddingProvider | None = None
|
|
170
|
+
cpu_only = getattr(args, "cpu_only", False)
|
|
137
171
|
if args.provider:
|
|
138
172
|
if args.provider == "ollama":
|
|
139
|
-
provider = OllamaEmbeddings(model=args.model, host=args.ollama_host)
|
|
173
|
+
provider = OllamaEmbeddings(model=args.model, host=args.ollama_host, cpu_only=cpu_only)
|
|
140
174
|
elif args.provider == "openai":
|
|
141
175
|
provider = OpenAIEmbeddings(model=args.model, api_key=args.openai_key)
|
|
142
176
|
elif args.provider in ("sentence-transformers", "st", "local"):
|
|
@@ -145,7 +179,7 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
145
179
|
print(f"Unknown provider: {args.provider}", file=sys.stderr)
|
|
146
180
|
return 1
|
|
147
181
|
else:
|
|
148
|
-
provider = get_embedding_provider(model=args.model)
|
|
182
|
+
provider = get_embedding_provider(model=args.model, cpu_only=cpu_only)
|
|
149
183
|
|
|
150
184
|
if not provider:
|
|
151
185
|
print("No embedding provider available.", file=sys.stderr)
|
|
@@ -156,12 +190,25 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
156
190
|
print(f"Provider {provider.provider_name} is not available.", file=sys.stderr)
|
|
157
191
|
return 1
|
|
158
192
|
|
|
159
|
-
|
|
193
|
+
# Show CPU-only status for Ollama
|
|
194
|
+
cpu_only_status = ""
|
|
195
|
+
if hasattr(provider, "cpu_only") and provider.cpu_only:
|
|
196
|
+
cpu_only_status = " (CPU-only)"
|
|
197
|
+
|
|
198
|
+
logger.info(
|
|
199
|
+
"CLI embeddings build provider=%s model=%s output=%s cpu_only=%s",
|
|
200
|
+
provider.provider_name,
|
|
201
|
+
provider.model,
|
|
202
|
+
args.output,
|
|
203
|
+
getattr(provider, "cpu_only", False),
|
|
204
|
+
)
|
|
205
|
+
print(f"Using provider: {provider.provider_name}{cpu_only_status}")
|
|
160
206
|
print(f"Model: {provider.model} ({provider.dimension}d)")
|
|
161
207
|
|
|
162
208
|
# Build embeddings
|
|
163
209
|
corpus = Corpus()
|
|
164
|
-
|
|
210
|
+
doc_total = corpus.document_count()
|
|
211
|
+
print(f"\nBuilding embeddings for {doc_total} documents...")
|
|
165
212
|
|
|
166
213
|
# Use the corpus's internal index
|
|
167
214
|
embedding_index = EmbeddingIndex(provider=provider)
|
|
@@ -218,6 +265,12 @@ def cmd_embeddings_build(args: argparse.Namespace) -> int:
|
|
|
218
265
|
output_path = Path(args.output)
|
|
219
266
|
embedding_index.save(output_path)
|
|
220
267
|
|
|
268
|
+
logger.info(
|
|
269
|
+
"CLI embeddings build completed docs=%s sections=%s output=%s",
|
|
270
|
+
doc_count,
|
|
271
|
+
section_count,
|
|
272
|
+
output_path,
|
|
273
|
+
)
|
|
221
274
|
print(f"\nDone! Embedded {section_count} sections from {doc_count} documents.")
|
|
222
275
|
print(f"Saved to: {output_path}")
|
|
223
276
|
|
|
@@ -265,6 +318,12 @@ def main() -> int:
|
|
|
265
318
|
)
|
|
266
319
|
emb_build.add_argument("--ollama-host", help="Ollama host URL")
|
|
267
320
|
emb_build.add_argument("--openai-key", help="OpenAI API key")
|
|
321
|
+
emb_build.add_argument(
|
|
322
|
+
"--cpu-only",
|
|
323
|
+
action="store_true",
|
|
324
|
+
help="Force CPU-only inference for Ollama (num_gpu=0). "
|
|
325
|
+
"Useful when GPU is under VRAM pressure.",
|
|
326
|
+
)
|
|
268
327
|
emb_build.set_defaults(func=cmd_embeddings_build)
|
|
269
328
|
|
|
270
329
|
args = parser.parse_args()
|
|
@@ -277,6 +336,7 @@ def main() -> int:
|
|
|
277
336
|
emb_parser.print_help()
|
|
278
337
|
return 0
|
|
279
338
|
|
|
339
|
+
logger.debug("CLI command executed: %s", args.command)
|
|
280
340
|
result: int = args.func(args)
|
|
281
341
|
return result
|
|
282
342
|
|
|
@@ -53,6 +53,26 @@ from typing import Any
|
|
|
53
53
|
from ifcraftcorpus.parser import Document, parse_directory
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
def _sanitize_fts_query(query: str) -> str:
|
|
57
|
+
"""Sanitize a query string for the FTS5 MATCH clause.
|
|
58
|
+
|
|
59
|
+
This function replaces hyphens with spaces to prevent FTS5 from
|
|
60
|
+
interpreting them as the `NOT` operator. This is intended to correctly
|
|
61
|
+
handle natural language queries with hyphenated words, for example
|
|
62
|
+
transforming "haunted-house" into a search for "haunted house".
|
|
63
|
+
|
|
64
|
+
It also collapses any resulting multiple spaces into a single space.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query: Raw query string from user input.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Sanitized query safe for FTS5 MATCH.
|
|
71
|
+
"""
|
|
72
|
+
# Replace hyphens and collapse whitespace in one go.
|
|
73
|
+
return " ".join(query.replace("-", " ").split())
|
|
74
|
+
|
|
75
|
+
|
|
56
76
|
@dataclass
|
|
57
77
|
class SearchResult:
|
|
58
78
|
"""A search result from the corpus FTS5 index.
|
|
@@ -380,8 +400,8 @@ class CorpusIndex:
|
|
|
380
400
|
... cluster="emotional-design",
|
|
381
401
|
... limit=5)
|
|
382
402
|
"""
|
|
383
|
-
# Build FTS5 query
|
|
384
|
-
fts_query = query
|
|
403
|
+
# Build FTS5 query - sanitize to handle special characters
|
|
404
|
+
fts_query = _sanitize_fts_query(query)
|
|
385
405
|
|
|
386
406
|
# Add cluster filter if specified
|
|
387
407
|
where_clause = ""
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Shared logging helpers for the IF Craft Corpus codebase."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from typing import Final
|
|
9
|
+
|
|
10
|
+
LOG_LEVEL_ENV: Final[str] = "LOG_LEVEL"
|
|
11
|
+
VERBOSE_ENV: Final[str] = "VERBOSE"
|
|
12
|
+
|
|
13
|
+
__all__ = ["configure_logging", "LOG_LEVEL_ENV", "VERBOSE_ENV"]
|
|
14
|
+
|
|
15
|
+
_TRUTHY_VALUES: Final[set[str]] = {"1", "true", "yes", "on"}
|
|
16
|
+
_configured: bool = False
|
|
17
|
+
_CHATTY_LOGGERS: Final[tuple[str, ...]] = (
|
|
18
|
+
"httpx",
|
|
19
|
+
"fakeredis",
|
|
20
|
+
"docket",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _is_truthy(value: str | None) -> bool:
|
|
25
|
+
"""Return True if the string resembles a truthy flag."""
|
|
26
|
+
|
|
27
|
+
if value is None:
|
|
28
|
+
return False
|
|
29
|
+
return value.strip().lower() in _TRUTHY_VALUES
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _resolve_level(value: str | None) -> int | None:
|
|
33
|
+
"""Convert a logging level string (name or integer) to ``int``."""
|
|
34
|
+
|
|
35
|
+
if not value:
|
|
36
|
+
return None
|
|
37
|
+
candidate = value.strip()
|
|
38
|
+
if not candidate:
|
|
39
|
+
return None
|
|
40
|
+
if candidate.isdigit():
|
|
41
|
+
return int(candidate)
|
|
42
|
+
name = candidate.upper()
|
|
43
|
+
return getattr(logging, name, None)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def configure_logging(
|
|
47
|
+
*,
|
|
48
|
+
env_level: str = LOG_LEVEL_ENV,
|
|
49
|
+
env_verbose: str = VERBOSE_ENV,
|
|
50
|
+
fmt: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
51
|
+
) -> int | None:
|
|
52
|
+
"""Configure root logging when LOG_LEVEL/VERBOSE are set.
|
|
53
|
+
|
|
54
|
+
Returns the configured level when logging is enabled, ``None`` otherwise.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
global _configured
|
|
58
|
+
|
|
59
|
+
raw_level = os.getenv(env_level)
|
|
60
|
+
level = _resolve_level(raw_level)
|
|
61
|
+
verbose_flag = os.getenv(env_verbose)
|
|
62
|
+
|
|
63
|
+
if raw_level and level is None:
|
|
64
|
+
print(
|
|
65
|
+
f"ifcraftcorpus: unknown log level '{raw_level}', defaulting to INFO",
|
|
66
|
+
file=sys.stderr,
|
|
67
|
+
)
|
|
68
|
+
level = logging.INFO
|
|
69
|
+
|
|
70
|
+
if level is None and not _is_truthy(verbose_flag):
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
if level is None:
|
|
74
|
+
level = logging.DEBUG
|
|
75
|
+
|
|
76
|
+
root = logging.getLogger()
|
|
77
|
+
if not (root.handlers and _configured):
|
|
78
|
+
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
|
|
79
|
+
_configured = True
|
|
80
|
+
root.setLevel(level)
|
|
81
|
+
|
|
82
|
+
for name in _CHATTY_LOGGERS:
|
|
83
|
+
logging.getLogger(name).setLevel(max(logging.WARNING, level))
|
|
84
|
+
return level
|