biblicus 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,200 @@
1
+ """
2
+ Shared span markup parsing utilities.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import re
8
+ from typing import Dict, List, Sequence
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field
11
+
12
+
13
+ class TextAnnotatedSpan(BaseModel):
14
+ """
15
+ Span annotated with arbitrary attributes.
16
+
17
+ :param index: One-based index of the span in the output order.
18
+ :type index: int
19
+ :param start_char: Start character offset in the original text.
20
+ :type start_char: int
21
+ :param end_char: End character offset in the original text.
22
+ :type end_char: int
23
+ :param text: Span text.
24
+ :type text: str
25
+ :param attributes: Attribute mapping extracted from the span tag.
26
+ :type attributes: dict[str, str]
27
+ """
28
+
29
+ model_config = ConfigDict(extra="forbid")
30
+
31
+ index: int = Field(ge=1)
32
+ start_char: int = Field(ge=0)
33
+ end_char: int = Field(ge=0)
34
+ text: str
35
+ attributes: Dict[str, str] = Field(default_factory=dict)
36
+
37
+
38
+ _TAG_PATTERN = re.compile(r"<span\b[^>]*>|</span>")
39
+ _OPEN_TAG_PATTERN = re.compile(r"<span\b([^>]*)>")
40
+ _ATTRIBUTE_PATTERN = re.compile(r'([A-Za-z_][A-Za-z0-9_-]*)="([^"]*)"')
41
+ _SPAN_INDEX_PATTERN = re.compile(r"Span (\d+)")
42
+
43
+
44
+ def strip_span_tags(text: str) -> str:
45
+ """
46
+ Remove span tags from text.
47
+
48
+ :param text: Text with span tags.
49
+ :type text: str
50
+ :return: Text with span tags removed.
51
+ :rtype: str
52
+ """
53
+ return re.sub(r"</?span\b[^>]*>", "", text)
54
+
55
+
56
+ def parse_span_markup(marked_up_text: str) -> List[TextAnnotatedSpan]:
57
+ """
58
+ Parse span tags with attributes into annotated spans.
59
+
60
+ :param marked_up_text: Text containing span tags.
61
+ :type marked_up_text: str
62
+ :return: Parsed spans with attributes.
63
+ :rtype: list[TextAnnotatedSpan]
64
+ :raises ValueError: If tags are malformed or nested.
65
+ """
66
+ spans: List[TextAnnotatedSpan] = []
67
+ cursor = 0
68
+ original_index = 0
69
+ span_start = None
70
+ span_text = ""
71
+ span_attributes: Dict[str, str] = {}
72
+
73
+ for match in _TAG_PATTERN.finditer(marked_up_text):
74
+ chunk = marked_up_text[cursor : match.start()]
75
+ if chunk:
76
+ if span_start is not None:
77
+ span_text += chunk
78
+ original_index += len(chunk)
79
+ tag = match.group(0)
80
+ if tag.startswith("<span"):
81
+ if span_start is not None:
82
+ raise ValueError("Text markup contains nested spans")
83
+ span_start = original_index
84
+ span_text = ""
85
+ span_attributes = _parse_span_attributes(tag)
86
+ else:
87
+ if span_start is None:
88
+ raise ValueError("Text markup contains an unmatched closing tag")
89
+ span_end = original_index
90
+ spans.append(
91
+ TextAnnotatedSpan(
92
+ index=len(spans) + 1,
93
+ start_char=span_start,
94
+ end_char=span_end,
95
+ text=span_text,
96
+ attributes=span_attributes,
97
+ )
98
+ )
99
+ span_start = None
100
+ span_text = ""
101
+ span_attributes = {}
102
+ cursor = match.end()
103
+
104
+ tail = marked_up_text[cursor:]
105
+ if tail:
106
+ if span_start is not None:
107
+ span_text += tail
108
+ original_index += len(tail)
109
+
110
+ if span_start is not None:
111
+ raise ValueError("Text markup contains an unclosed span")
112
+
113
+ return spans
114
+
115
+
116
+ def extract_span_indices(errors: Sequence[str]) -> List[int]:
117
+ """
118
+ Extract span indices referenced in error messages.
119
+
120
+ :param errors: Validation error messages.
121
+ :type errors: Sequence[str]
122
+ :return: Sorted list of referenced span indices.
123
+ :rtype: list[int]
124
+ """
125
+ indices: List[int] = []
126
+ for error in errors:
127
+ match = _SPAN_INDEX_PATTERN.search(error)
128
+ if match is None:
129
+ continue
130
+ indices.append(int(match.group(1)))
131
+ return sorted(set(indices))
132
+
133
+
134
+ def summarize_span_context(marked_up_text: str, span_indices: Sequence[int]) -> List[str]:
135
+ """
136
+ Summarize span context for the requested indices.
137
+
138
+ :param marked_up_text: Text containing span tags.
139
+ :type marked_up_text: str
140
+ :param span_indices: Span indices to summarize.
141
+ :type span_indices: Sequence[int]
142
+ :return: Human-readable span summaries.
143
+ :rtype: list[str]
144
+ :raises ValueError: If the markup is invalid.
145
+ """
146
+ spans = parse_span_markup(marked_up_text)
147
+ span_by_index = {span.index: span for span in spans}
148
+ summaries: List[str] = []
149
+ for index in span_indices:
150
+ span = span_by_index.get(index)
151
+ if span is None:
152
+ continue
153
+ cleaned_text = " ".join(span.text.split())
154
+ if cleaned_text:
155
+ summaries.append(f"Span {index}: {cleaned_text}")
156
+ return summaries
157
+
158
+
159
+ def build_span_context_section(marked_up_text: str, errors: Sequence[str]) -> str:
160
+ """
161
+ Build a formatted span context section for retry messages.
162
+
163
+ :param marked_up_text: Text containing span tags.
164
+ :type marked_up_text: str
165
+ :param errors: Validation error messages.
166
+ :type errors: Sequence[str]
167
+ :return: Formatted span context block or empty string.
168
+ :rtype: str
169
+ """
170
+ indices = extract_span_indices(errors)
171
+ if not indices:
172
+ return ""
173
+ try:
174
+ summaries = summarize_span_context(marked_up_text, indices)
175
+ except ValueError:
176
+ return ""
177
+ if not summaries:
178
+ return ""
179
+ summary_lines = "\n".join(f"- {summary}" for summary in summaries)
180
+ return f"Relevant spans:\n{summary_lines}\n\n"
181
+
182
+
183
+ def _parse_span_attributes(tag_text: str) -> Dict[str, str]:
184
+ match = _OPEN_TAG_PATTERN.fullmatch(tag_text)
185
+ if match is None:
186
+ raise ValueError("Text markup contains an invalid span tag")
187
+ attr_text = match.group(1).strip().replace('\\"', '"')
188
+ if not attr_text:
189
+ return {}
190
+ attributes: Dict[str, str] = {}
191
+ for attr_match in _ATTRIBUTE_PATTERN.finditer(attr_text):
192
+ name = attr_match.group(1)
193
+ value = attr_match.group(2)
194
+ if name in attributes:
195
+ raise ValueError("Text markup contains duplicate span attributes")
196
+ attributes[name] = value
197
+ cleaned = _ATTRIBUTE_PATTERN.sub("", attr_text).strip()
198
+ if cleaned:
199
+ raise ValueError("Text markup contains unsupported span attributes")
200
+ return attributes
@@ -0,0 +1,319 @@
1
+ """
2
+ Pydantic models for agentic text utilities.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
10
+
11
+ from ..ai.models import LlmClientConfig
12
+ from .markup import TextAnnotatedSpan
13
+ from .prompts import (
14
+ DEFAULT_ANNOTATE_SYSTEM_PROMPT,
15
+ DEFAULT_EXTRACT_SYSTEM_PROMPT,
16
+ DEFAULT_LINK_SYSTEM_PROMPT,
17
+ DEFAULT_PROMPT_TEMPLATE,
18
+ DEFAULT_REDACT_SYSTEM_PROMPT,
19
+ DEFAULT_SLICE_SYSTEM_PROMPT,
20
+ )
21
+
22
+
23
+ class TextToolLoopRequest(BaseModel):
24
+ """
25
+ Request to apply a tool-loop text operation using a language model.
26
+
27
+ :param text: Input text to process.
28
+ :type text: str
29
+ :param client: LLM client configuration.
30
+ :type client: biblicus.ai.models.LlmClientConfig
31
+ :param prompt_template: Prompt template describing what to return (must not include ``{text}``).
32
+ :type prompt_template: str
33
+ :param system_prompt: System prompt template containing ``{text}``. The base request requires
34
+ callers to supply this. Specific utility requests provide built-in defaults so callers
35
+ can typically omit it.
36
+ :type system_prompt: str
37
+ :param max_rounds: Maximum number of edit rounds.
38
+ :type max_rounds: int
39
+ :param max_edits_per_round: Maximum edits per round.
40
+ :type max_edits_per_round: int
41
+ :param mock_marked_up_text: Optional pre-rendered markup for deterministic tests.
42
+ :type mock_marked_up_text: str or None
43
+ """
44
+
45
+ model_config = ConfigDict(extra="forbid")
46
+
47
+ text: str = Field(min_length=1)
48
+ client: LlmClientConfig
49
+ prompt_template: str = Field(default=DEFAULT_PROMPT_TEMPLATE, min_length=1)
50
+ system_prompt: str = Field(min_length=1)
51
+ max_rounds: int = Field(default=6, ge=1)
52
+ max_edits_per_round: int = Field(default=500, ge=1)
53
+ mock_marked_up_text: Optional[str] = Field(default=None, min_length=1)
54
+
55
+ @model_validator(mode="after")
56
+ def _validate_prompts(self) -> "TextToolLoopRequest":
57
+ if "{text}" not in self.system_prompt:
58
+ raise ValueError("system_prompt must include {text}")
59
+ if "{text}" in self.prompt_template:
60
+ raise ValueError("prompt_template must not include {text}")
61
+ return self
62
+
63
+
64
+ class TextExtractRequest(TextToolLoopRequest):
65
+ """
66
+ Request to apply text extract using a language model.
67
+
68
+ :param text: Input text to annotate with XML span tags.
69
+ :type text: str
70
+ :param client: LLM client configuration.
71
+ :type client: biblicus.ai.models.LlmClientConfig
72
+ :param prompt_template: Prompt template describing what to return (must not include ``{text}``).
73
+ :type prompt_template: str
74
+ :param system_prompt: System prompt template containing ``{text}``. Defaults to the built-in
75
+ text extract system prompt.
76
+ :type system_prompt: str
77
+ :param max_rounds: Maximum number of edit rounds.
78
+ :type max_rounds: int
79
+ :param max_edits_per_round: Maximum edits per round.
80
+ :type max_edits_per_round: int
81
+ """
82
+
83
+ system_prompt: str = Field(default=DEFAULT_EXTRACT_SYSTEM_PROMPT, min_length=1)
84
+
85
+
86
+ class TextSliceRequest(TextToolLoopRequest):
87
+ """
88
+ Request to apply text slice using a language model.
89
+
90
+ :param text: Input text to mark with slice markers.
91
+ :type text: str
92
+ :param client: LLM client configuration.
93
+ :type client: biblicus.ai.models.LlmClientConfig
94
+ :param prompt_template: Prompt template describing what to return (must not include ``{text}``).
95
+ :type prompt_template: str
96
+ :param system_prompt: System prompt template containing ``{text}``. Defaults to the built-in
97
+ text slice system prompt.
98
+ :type system_prompt: str
99
+ :param max_rounds: Maximum number of edit rounds.
100
+ :type max_rounds: int
101
+ :param max_edits_per_round: Maximum edits per round.
102
+ :type max_edits_per_round: int
103
+ """
104
+
105
+ system_prompt: str = Field(default=DEFAULT_SLICE_SYSTEM_PROMPT, min_length=1)
106
+
107
+
108
+ class TextAnnotateRequest(TextToolLoopRequest):
109
+ """
110
+ Request to apply text annotation using span attributes.
111
+
112
+ :param text: Input text to annotate with XML span tags.
113
+ :type text: str
114
+ :param client: LLM client configuration.
115
+ :type client: biblicus.ai.models.LlmClientConfig
116
+ :param prompt_template: Prompt template describing what to return (must not include ``{text}``).
117
+ :type prompt_template: str
118
+ :param system_prompt: System prompt containing ``{text}``. Defaults to the built-in
119
+ text annotate system prompt.
120
+ :type system_prompt: str
121
+ :param allowed_attributes: Optional list of allowed span attribute names.
122
+ :type allowed_attributes: list[str] or None
123
+ :param max_rounds: Maximum number of edit rounds.
124
+ :type max_rounds: int
125
+ :param max_edits_per_round: Maximum edits per round.
126
+ :type max_edits_per_round: int
127
+ """
128
+
129
+ system_prompt: str = Field(default=DEFAULT_ANNOTATE_SYSTEM_PROMPT, min_length=1)
130
+ allowed_attributes: Optional[List[str]] = None
131
+
132
+
133
+ class TextRedactRequest(TextToolLoopRequest):
134
+ """
135
+ Request to apply text redaction using span markers.
136
+
137
+ :param text: Input text to annotate with XML span tags.
138
+ :type text: str
139
+ :param client: LLM client configuration.
140
+ :type client: biblicus.ai.models.LlmClientConfig
141
+ :param prompt_template: Prompt template describing what to return (must not include ``{text}``).
142
+ :type prompt_template: str
143
+ :param system_prompt: System prompt containing ``{text}``. Defaults to the built-in
144
+ text redact system prompt.
145
+ :type system_prompt: str
146
+ :param redaction_types: Optional list of allowed redaction types. When omitted, no attributes are allowed.
147
+ :type redaction_types: list[str] or None
148
+ :param max_rounds: Maximum number of edit rounds.
149
+ :type max_rounds: int
150
+ :param max_edits_per_round: Maximum edits per round.
151
+ :type max_edits_per_round: int
152
+ """
153
+
154
+ system_prompt: str = Field(default=DEFAULT_REDACT_SYSTEM_PROMPT, min_length=1)
155
+ redaction_types: Optional[List[str]] = None
156
+
157
+
158
+ class TextLinkRequest(TextToolLoopRequest):
159
+ """
160
+ Request to apply text linking using id/ref span attributes.
161
+
162
+ :param text: Input text to annotate with XML span tags.
163
+ :type text: str
164
+ :param client: LLM client configuration.
165
+ :type client: biblicus.ai.models.LlmClientConfig
166
+ :param prompt_template: Prompt template describing what to return (must not include ``{text}``).
167
+ :type prompt_template: str
168
+ :param system_prompt: System prompt containing ``{text}``. Defaults to the built-in
169
+ text link system prompt.
170
+ :type system_prompt: str
171
+ :param id_prefix: Prefix required for id attributes.
172
+ :type id_prefix: str
173
+ :param max_rounds: Maximum number of edit rounds.
174
+ :type max_rounds: int
175
+ :param max_edits_per_round: Maximum edits per round.
176
+ :type max_edits_per_round: int
177
+ """
178
+
179
+ system_prompt: str = Field(default=DEFAULT_LINK_SYSTEM_PROMPT, min_length=1)
180
+ id_prefix: str = Field(default="link_", min_length=1)
181
+
182
+
183
+ class TextExtractSpan(BaseModel):
184
+ """
185
+ Extracted span of text.
186
+
187
+ :param index: One-based index of the span in the output order.
188
+ :type index: int
189
+ :param start_char: Start character offset in the original text.
190
+ :type start_char: int
191
+ :param end_char: End character offset in the original text.
192
+ :type end_char: int
193
+ :param text: Span text.
194
+ :type text: str
195
+ """
196
+
197
+ model_config = ConfigDict(extra="forbid")
198
+
199
+ index: int = Field(ge=1)
200
+ start_char: int = Field(ge=0)
201
+ end_char: int = Field(ge=0)
202
+ text: str
203
+
204
+
205
+ class TextSliceSegment(BaseModel):
206
+ """
207
+ Extracted text slice.
208
+
209
+ :param index: One-based index of the slice in the output order.
210
+ :type index: int
211
+ :param start_char: Start character offset in the original text.
212
+ :type start_char: int
213
+ :param end_char: End character offset in the original text.
214
+ :type end_char: int
215
+ :param text: Slice text.
216
+ :type text: str
217
+ """
218
+
219
+ model_config = ConfigDict(extra="forbid")
220
+
221
+ index: int = Field(ge=1)
222
+ start_char: int = Field(ge=0)
223
+ end_char: int = Field(ge=0)
224
+ text: str
225
+
226
+
227
+ class TextExtractResult(BaseModel):
228
+ """
229
+ Text extract output bundle.
230
+
231
+ :param marked_up_text: Original text with XML span tags inserted.
232
+ :type marked_up_text: str
233
+ :param spans: Extracted spans in document order.
234
+ :type spans: list[TextExtractSpan]
235
+ :param warnings: Warning messages for the caller.
236
+ :type warnings: list[str]
237
+ """
238
+
239
+ model_config = ConfigDict(extra="forbid")
240
+
241
+ marked_up_text: str
242
+ spans: List[TextExtractSpan] = Field(default_factory=list)
243
+ warnings: List[str] = Field(default_factory=list)
244
+
245
+
246
+ class TextSliceResult(BaseModel):
247
+ """
248
+ Text slice output bundle.
249
+
250
+ :param marked_up_text: Original text with slice markers inserted.
251
+ :type marked_up_text: str
252
+ :param slices: Extracted slices in document order.
253
+ :type slices: list[TextSliceSegment]
254
+ :param warnings: Warning messages for the caller.
255
+ :type warnings: list[str]
256
+ """
257
+
258
+ model_config = ConfigDict(extra="forbid")
259
+
260
+ marked_up_text: str
261
+ slices: List[TextSliceSegment] = Field(default_factory=list)
262
+ warnings: List[str] = Field(default_factory=list)
263
+
264
+
265
+ class TextAnnotateResult(BaseModel):
266
+ """
267
+ Text annotation output bundle.
268
+
269
+ :param marked_up_text: Original text with XML span tags inserted.
270
+ :type marked_up_text: str
271
+ :param spans: Extracted spans in document order.
272
+ :type spans: list[TextAnnotatedSpan]
273
+ :param warnings: Warning messages for the caller.
274
+ :type warnings: list[str]
275
+ """
276
+
277
+ model_config = ConfigDict(extra="forbid")
278
+
279
+ marked_up_text: str
280
+ spans: List[TextAnnotatedSpan] = Field(default_factory=list)
281
+ warnings: List[str] = Field(default_factory=list)
282
+
283
+
284
+ class TextRedactResult(BaseModel):
285
+ """
286
+ Text redaction output bundle.
287
+
288
+ :param marked_up_text: Original text with XML span tags inserted.
289
+ :type marked_up_text: str
290
+ :param spans: Redacted spans in document order.
291
+ :type spans: list[TextAnnotatedSpan]
292
+ :param warnings: Warning messages for the caller.
293
+ :type warnings: list[str]
294
+ """
295
+
296
+ model_config = ConfigDict(extra="forbid")
297
+
298
+ marked_up_text: str
299
+ spans: List[TextAnnotatedSpan] = Field(default_factory=list)
300
+ warnings: List[str] = Field(default_factory=list)
301
+
302
+
303
+ class TextLinkResult(BaseModel):
304
+ """
305
+ Text linking output bundle.
306
+
307
+ :param marked_up_text: Original text with XML span tags inserted.
308
+ :type marked_up_text: str
309
+ :param spans: Linked spans in document order.
310
+ :type spans: list[TextAnnotatedSpan]
311
+ :param warnings: Warning messages for the caller.
312
+ :type warnings: list[str]
313
+ """
314
+
315
+ model_config = ConfigDict(extra="forbid")
316
+
317
+ marked_up_text: str
318
+ spans: List[TextAnnotatedSpan] = Field(default_factory=list)
319
+ warnings: List[str] = Field(default_factory=list)
@@ -0,0 +1,113 @@
1
+ """
2
+ Default prompts for Biblicus text utilities.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ DEFAULT_PROMPT_TEMPLATE = "Return the requested text."
8
+
9
+ DEFAULT_EXTRACT_SYSTEM_PROMPT = (
10
+ "You are a virtual file editor. Use the available tools to edit the text.\n"
11
+ "Interpret the word 'return' in the user's request as: wrap the returned text with "
12
+ "<span>...</span> in-place in the current text.\n\n"
13
+ "Use the str_replace tool to insert <span>...</span> tags and the done tool when finished.\n"
14
+ "When finished, call done. Do NOT return JSON in the assistant message.\n\n"
15
+ "Rules:\n"
16
+ "- Use str_replace only.\n"
17
+ "- old_str must match exactly once in the current text.\n"
18
+ "- When choosing old_str, copy the exact substring (including punctuation/case) from the current text.\n"
19
+ "- old_str and new_str must be non-empty strings.\n"
20
+ "- new_str must be identical to old_str with only <span> and </span> inserted.\n"
21
+ "- Do not include <span> or </span> inside old_str or new_str.\n"
22
+ "- Do not insert nested spans.\n"
23
+ "- If a tool call fails due to non-unique old_str, retry with a longer unique old_str.\n"
24
+ "- If a tool call fails, read the error and keep editing. Do not call done until spans are inserted.\n"
25
+ "- Do not delete, reorder, paraphrase, or label text.\n\n"
26
+ "Current text:\n---\n{text}\n---\n"
27
+ )
28
+
29
+ DEFAULT_SLICE_SYSTEM_PROMPT = (
30
+ "You are a virtual file editor. Use the available tools to edit the text.\n"
31
+ "Interpret the word 'return' in the user's request as: insert <slice/> markers in-place in the current text.\n\n"
32
+ "Use the str_replace tool to insert <slice/> markers and the done tool when finished.\n"
33
+ "When finished, call done. Do NOT return JSON in the assistant message.\n\n"
34
+ "Rules:\n"
35
+ "- Use str_replace only.\n"
36
+ "- old_str must match exactly once in the current text.\n"
37
+ "- old_str and new_str must be non-empty strings.\n"
38
+ "- new_str must be identical to old_str with only <slice/> inserted.\n"
39
+ "- Do not include <slice/> inside old_str or new_str.\n"
40
+ "- If a tool call fails due to non-unique old_str, retry with a longer unique old_str.\n"
41
+ "- If a tool call fails, read the error and keep editing. Do not call done until markers are inserted.\n"
42
+ "- Do not delete, reorder, or paraphrase text.\n\n"
43
+ "Current text:\n---\n{text}\n---\n"
44
+ )
45
+
46
+ DEFAULT_ANNOTATE_SYSTEM_PROMPT = (
47
+ "You are a virtual file editor. Use the available tools to edit the text.\n"
48
+ "Interpret the user's request as: wrap the requested text with "
49
+ '<span ATTRIBUTE="VALUE">...</span> in-place in the current text.\n'
50
+ "Each span must include exactly one attribute from: {{ allowed_attributes }}.\n\n"
51
+ "Use the str_replace tool to insert span tags and the done tool when finished.\n"
52
+ "When finished, call done. Do NOT return JSON in the assistant message.\n\n"
53
+ "Rules:\n"
54
+ "- Use str_replace only.\n"
55
+ "- old_str must match exactly once in the current text.\n"
56
+ "- old_str and new_str must be non-empty strings.\n"
57
+ "- new_str must be identical to old_str with only <span ...> and </span> inserted.\n"
58
+ "- Do not include <span or </span> inside old_str or new_str.\n"
59
+ "- Do not insert nested spans.\n"
60
+ "- If a tool call fails due to non-unique old_str, retry with a longer unique old_str.\n"
61
+ "- If a tool call fails, read the error and keep editing. Do not call done until spans are inserted.\n"
62
+ "- Do not delete, reorder, paraphrase, or label text beyond the span attributes.\n\n"
63
+ "Current text:\n---\n{text}\n---\n"
64
+ )
65
+
66
+ DEFAULT_LINK_SYSTEM_PROMPT = (
67
+ "You are a virtual file editor. Use the available tools to edit the text.\n"
68
+ "Interpret the word 'return' in the user's request as: wrap the returned text with "
69
+ '<span ATTRIBUTE="VALUE">...</span> in-place in the current text.\n'
70
+ "Each span must include exactly one attribute: id for first mentions and ref for repeats.\n"
71
+ "Id values must start with '{{ id_prefix }}'.\n\n"
72
+ "Linking rules:\n"
73
+ "- For each distinct name or entity, assign exactly one id on its first occurrence.\n"
74
+ "- Use ref on every subsequent occurrence of the same name or entity.\n"
75
+ "- Wrap only the repeated name or entity text itself (no extra surrounding words).\n"
76
+ "- Reuse the same id/ref value for identical names; do not create multiple ids for the same name.\n"
77
+ "- Use ids in order of first appearance ({{ id_prefix }}1, {{ id_prefix }}2, ...).\n"
78
+ "- Do not call done until every repeated name or entity in the text is wrapped.\n"
79
+ "- If a name appears multiple times, there must be one id and refs for every later occurrence.\n\n"
80
+ "Use the str_replace tool to insert span tags and the done tool when finished.\n"
81
+ "When finished, call done. Do NOT return JSON in the assistant message.\n\n"
82
+ "Rules:\n"
83
+ "- Use str_replace only.\n"
84
+ "- old_str must match exactly once in the current text.\n"
85
+ "- old_str and new_str must be non-empty strings.\n"
86
+ "- new_str must be identical to old_str with only <span ...> and </span> inserted.\n"
87
+ "- Do not include <span or </span> inside old_str or new_str.\n"
88
+ "- Do not insert nested spans.\n"
89
+ "- If a tool call fails due to non-unique old_str, retry with a longer unique old_str.\n"
90
+ "- If a tool call fails, read the error and keep editing. Do not call done until spans are inserted.\n"
91
+ "- Do not delete, reorder, or paraphrase text.\n\n"
92
+ "Current text:\n---\n{text}\n---\n"
93
+ )
94
+
95
+ DEFAULT_REDACT_SYSTEM_PROMPT = (
96
+ "You are a virtual file editor. Use the available tools to edit the text.\n"
97
+ "Interpret the word 'return' in the user's request as: wrap the returned text with "
98
+ "<span>...</span> in-place in the current text.\n"
99
+ "If redaction types are provided, use a redact attribute with one of: {{ redaction_types }}.\n\n"
100
+ "Use the str_replace tool to insert span tags and the done tool when finished.\n"
101
+ "When finished, call done. Do NOT return JSON in the assistant message.\n\n"
102
+ "Rules:\n"
103
+ "- Use str_replace only.\n"
104
+ "- old_str must match exactly once in the current text.\n"
105
+ "- old_str and new_str must be non-empty strings.\n"
106
+ "- new_str must be identical to old_str with only <span ...> and </span> inserted.\n"
107
+ "- Do not include <span or </span> inside old_str or new_str.\n"
108
+ "- Do not insert nested spans.\n"
109
+ "- If a tool call fails due to non-unique old_str, retry with a longer unique old_str.\n"
110
+ "- If a tool call fails, read the error and keep editing. Do not call done until spans are inserted.\n"
111
+ "- Do not delete, reorder, or paraphrase text.\n\n"
112
+ "Current text:\n---\n{text}\n---\n"
113
+ )