nighthawk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,344 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import inspect
5
+ import logging
6
+ import re
7
+ from collections.abc import Iterable
8
+ from string import Template
9
+ from typing import Any, TypeAliasType
10
+
11
+ import tiktoken
12
+
13
+ from ..configuration import StepContextLimits, StepExecutorConfiguration
14
+ from ..json_renderer import JsonRendererStyle, count_tokens, render_json_text
15
+ from .scoping import (
16
+ RUN_ID,
17
+ SCOPE_ID,
18
+ STEP_ID,
19
+ get_execution_context,
20
+ get_user_prompt_suffix_fragments,
21
+ )
22
+ from .step_context import _MISSING, StepContext, resolve_name_in_step_context
23
+
24
+
25
+ def _resolve_partial_effective_signature(partial_callable: functools.partial[Any]) -> str | None:
26
+ try:
27
+ resolved_signature = inspect.signature(partial_callable)
28
+ except (TypeError, ValueError):
29
+ return None
30
+ return str(resolved_signature)
31
+
32
+
33
+ def _resolve_callable_signature_text(value: object) -> str | None:
34
+ if isinstance(value, functools.partial):
35
+ return _resolve_partial_effective_signature(value)
36
+
37
+ try:
38
+ resolved_signature = inspect.signature(value) # type: ignore[arg-type]
39
+ except (TypeError, ValueError):
40
+ return None
41
+ return str(resolved_signature)
42
+
43
+
44
+ def _normalize_usage_intent_text(text: str, *, max_length: int = 72) -> str:
45
+ normalized_text = " ".join(text.split())
46
+ if len(normalized_text) <= max_length:
47
+ return normalized_text
48
+ return normalized_text[: max_length - 3].rstrip() + "..."
49
+
50
+
51
+ def _is_meaningful_usage_intent_hint(usage_intent_hint: str) -> bool:
52
+ normalized_lower_text = usage_intent_hint.strip().lower()
53
+ meaningless_usage_intent_hint_set = {
54
+ "call self as a function.",
55
+ "create a new function with partial application of the given arguments and keywords.",
56
+ "create a new function with partial application of the given arguments",
57
+ }
58
+ return normalized_lower_text not in meaningless_usage_intent_hint_set
59
+
60
+
61
+ def _resolve_callable_usage_intent_hint(*, value: object) -> str | None:
62
+ documentation_text: str | None = None
63
+
64
+ if isinstance(value, functools.partial):
65
+ documentation_text = inspect.getdoc(value.func)
66
+ elif not inspect.isroutine(value):
67
+ call_attribute = getattr(value, "__call__", None) # noqa: B004
68
+ if call_attribute is not None:
69
+ documentation_text = inspect.getdoc(call_attribute)
70
+
71
+ if not documentation_text:
72
+ documentation_text = inspect.getdoc(value)
73
+
74
+ if documentation_text:
75
+ first_line = documentation_text.splitlines()[0].strip()
76
+ if first_line:
77
+ normalized_usage_intent_hint = _normalize_usage_intent_text(first_line)
78
+ if _is_meaningful_usage_intent_hint(normalized_usage_intent_hint):
79
+ return normalized_usage_intent_hint
80
+
81
+ return None
82
+
83
+
84
+ def _is_async_callable_value(value: object) -> bool:
85
+ if isinstance(value, functools.partial):
86
+ return _is_async_callable_value(value.func)
87
+
88
+ if inspect.iscoroutinefunction(value):
89
+ return True
90
+
91
+ if not inspect.isroutine(value):
92
+ call_attribute = getattr(value, "__call__", None) # noqa: B004
93
+ if call_attribute is not None and inspect.iscoroutinefunction(call_attribute):
94
+ return True
95
+
96
+ return False
97
+
98
+
99
+ def _find_ambiguous_callable_signatures(
100
+ *,
101
+ reference_and_value_list: list[tuple[str, object]],
102
+ ) -> dict[str, list[str]]:
103
+ callable_signature_text_to_reference_list: dict[str, list[str]] = {}
104
+ for reference, value in reference_and_value_list:
105
+ if not callable(value):
106
+ continue
107
+
108
+ callable_signature_text = _resolve_callable_signature_text(value)
109
+ if callable_signature_text is None:
110
+ continue
111
+
112
+ callable_signature_text_to_reference_list.setdefault(callable_signature_text, []).append(reference)
113
+
114
+ return {
115
+ callable_signature_text: reference_list
116
+ for callable_signature_text, reference_list in callable_signature_text_to_reference_list.items()
117
+ if len(reference_list) > 1
118
+ }
119
+
120
+
121
+ def _render_callable_line(
122
+ *,
123
+ reference: str,
124
+ value: object,
125
+ callable_signature_text_to_reference_list: dict[str, list[str]],
126
+ ) -> str:
127
+ usage_intent_hint = _resolve_callable_usage_intent_hint(value=value)
128
+ callable_signature_text = _resolve_callable_signature_text(value)
129
+ if callable_signature_text is None:
130
+ rendered_text = f"{reference}: <callable; signature-unavailable>"
131
+ if usage_intent_hint is not None:
132
+ rendered_text += f" # intent: {usage_intent_hint}"
133
+ return rendered_text
134
+
135
+ rendered_text = f"{reference}: {callable_signature_text}"
136
+ metadata_comment_list: list[str] = []
137
+ if usage_intent_hint is not None:
138
+ metadata_comment_list.append(f"intent: {usage_intent_hint}")
139
+ if _is_async_callable_value(value):
140
+ metadata_comment_list.append("async")
141
+ if callable_signature_text in callable_signature_text_to_reference_list:
142
+ metadata_comment_list.append(f"disambiguation: use {reference}")
143
+
144
+ if metadata_comment_list:
145
+ rendered_text += f" # {'; '.join(metadata_comment_list)}"
146
+ return rendered_text
147
+
148
+
149
+ def _render_reference_and_value_list_section(
150
+ *,
151
+ section_name: str,
152
+ step_context: StepContext,
153
+ reference_and_value_list: list[tuple[str, object]],
154
+ max_items: int,
155
+ section_max_tokens: int,
156
+ value_max_tokens: int,
157
+ token_encoding: tiktoken.Encoding,
158
+ json_renderer_style: JsonRendererStyle,
159
+ ) -> str:
160
+ lines: list[str] = []
161
+ total_tokens = 0
162
+ shown_items = 0
163
+ token_limit_reached = False
164
+ callable_signature_text_to_reference_list = _find_ambiguous_callable_signatures(reference_and_value_list=reference_and_value_list)
165
+
166
+ for reference, value in reference_and_value_list:
167
+ if shown_items >= max_items:
168
+ break
169
+
170
+ if isinstance(value, TypeAliasType):
171
+ rendered = f"{reference}: type = {value.__value__}"
172
+ rendered_tokens = count_tokens(rendered, token_encoding)
173
+ elif callable(value):
174
+ rendered = _render_callable_line(
175
+ reference=reference,
176
+ value=value,
177
+ callable_signature_text_to_reference_list=callable_signature_text_to_reference_list,
178
+ )
179
+ rendered_tokens = count_tokens(rendered, token_encoding)
180
+ else:
181
+ reference_type_name = type(value).__name__
182
+ rendered_name_and_type = f"{reference}: {reference_type_name} = "
183
+
184
+ rendered_value, rendered_value_tokens = render_json_text(
185
+ value,
186
+ max_tokens=value_max_tokens,
187
+ encoding=token_encoding,
188
+ style=json_renderer_style,
189
+ )
190
+
191
+ rendered = rendered_name_and_type + rendered_value
192
+ rendered_tokens = count_tokens(rendered_name_and_type, token_encoding) + rendered_value_tokens
193
+
194
+ if total_tokens + rendered_tokens + 1 > section_max_tokens:
195
+ token_limit_reached = True
196
+ break
197
+
198
+ lines.append(rendered)
199
+ total_tokens += rendered_tokens + 1
200
+ shown_items += 1
201
+
202
+ truncated = shown_items < len(reference_and_value_list)
203
+ if truncated:
204
+ lines.append("<snipped>")
205
+ if token_limit_reached:
206
+ log_attributes: dict[str, Any] = {
207
+ STEP_ID: step_context.step_id,
208
+ "nighthawk.prompt_context.section": section_name,
209
+ "nighthawk.prompt_context.reason": "token_limit",
210
+ "nighthawk.prompt_context.rendered_items": shown_items,
211
+ "nighthawk.prompt_context.total_items": len(reference_and_value_list),
212
+ "nighthawk.prompt_context.max_tokens": section_max_tokens,
213
+ }
214
+ try:
215
+ execution_context = get_execution_context()
216
+ log_attributes[RUN_ID] = execution_context.run_id
217
+ log_attributes[SCOPE_ID] = execution_context.scope_id
218
+ except Exception:
219
+ pass
220
+ logging.getLogger("nighthawk").info("prompt_context_truncated %s", log_attributes)
221
+
222
+ return "\n".join(lines)
223
+
224
+
225
+ def _render_locals_section(
226
+ *,
227
+ step_context: StepContext,
228
+ references: Iterable[str],
229
+ token_encoding: tiktoken.Encoding,
230
+ context_limits: StepContextLimits,
231
+ json_renderer_style: JsonRendererStyle,
232
+ ) -> str:
233
+
234
+ eligible_reference_and_value_list: list[tuple[str, object]] = []
235
+ for reference in sorted(references):
236
+ if reference.startswith("__"):
237
+ continue
238
+ eligible_reference_and_value_list.append((reference, step_context.step_locals[reference]))
239
+
240
+ return _render_reference_and_value_list_section(
241
+ section_name="locals",
242
+ step_context=step_context,
243
+ reference_and_value_list=eligible_reference_and_value_list,
244
+ max_items=context_limits.locals_max_items,
245
+ section_max_tokens=context_limits.locals_max_tokens,
246
+ value_max_tokens=context_limits.value_max_tokens,
247
+ token_encoding=token_encoding,
248
+ json_renderer_style=json_renderer_style,
249
+ )
250
+
251
+
252
+ def extract_references_and_program(text: str) -> tuple[tuple[str, ...], str]:
253
+ reference_path_pattern = r"[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)*"
254
+
255
+ unescaped_token_pattern = re.compile(r"(?<!\\)<(" + reference_path_pattern + r")>")
256
+ escaped_token_pattern = re.compile(r"\\<(" + reference_path_pattern + r")>")
257
+
258
+ references: set[str] = set()
259
+ for match in unescaped_token_pattern.finditer(text):
260
+ reference_path = match.group(1)
261
+ references.add(reference_path.split(".", 1)[0])
262
+
263
+ def unescape(match: re.Match[str]) -> str:
264
+ return f"<{match.group(1)}>"
265
+
266
+ unescaped_text = escaped_token_pattern.sub(unescape, text)
267
+ return tuple(references), unescaped_text
268
+
269
+
270
+ def _render_globals_section(
271
+ *,
272
+ step_context: StepContext,
273
+ references: Iterable[str],
274
+ token_encoding: tiktoken.Encoding,
275
+ context_limits: StepContextLimits,
276
+ json_renderer_style: JsonRendererStyle,
277
+ ) -> str:
278
+
279
+ eligible_reference_and_value_list: list[tuple[str, object]] = []
280
+ for reference in sorted(references):
281
+ if reference.startswith("__"):
282
+ continue
283
+ if reference in step_context.step_locals:
284
+ continue
285
+ value = resolve_name_in_step_context(step_context, reference)
286
+ if value is _MISSING:
287
+ continue
288
+ eligible_reference_and_value_list.append((reference, value))
289
+
290
+ return _render_reference_and_value_list_section(
291
+ section_name="globals",
292
+ step_context=step_context,
293
+ reference_and_value_list=eligible_reference_and_value_list,
294
+ max_items=context_limits.globals_max_items,
295
+ section_max_tokens=context_limits.globals_max_tokens,
296
+ value_max_tokens=context_limits.value_max_tokens,
297
+ token_encoding=token_encoding,
298
+ json_renderer_style=json_renderer_style,
299
+ )
300
+
301
+
302
+ def build_user_prompt(
303
+ *,
304
+ processed_natural_program: str,
305
+ step_context: StepContext,
306
+ configuration: StepExecutorConfiguration,
307
+ ) -> str:
308
+ template_text = configuration.prompts.step_user_prompt_template
309
+ context_limits = configuration.context_limits
310
+ token_encoding = configuration.resolve_token_encoding()
311
+
312
+ locals_text = _render_locals_section(
313
+ step_context=step_context,
314
+ references=step_context.step_locals.keys(),
315
+ token_encoding=token_encoding,
316
+ context_limits=context_limits,
317
+ json_renderer_style=configuration.json_renderer_style,
318
+ )
319
+
320
+ references, program_text = extract_references_and_program(processed_natural_program)
321
+ globals_text = _render_globals_section(
322
+ step_context=step_context,
323
+ references=references,
324
+ token_encoding=token_encoding,
325
+ context_limits=context_limits,
326
+ json_renderer_style=configuration.json_renderer_style,
327
+ )
328
+
329
+ template = Template(template_text)
330
+
331
+ prompt_text = template.substitute(
332
+ program=program_text,
333
+ locals=locals_text,
334
+ globals=globals_text,
335
+ )
336
+
337
+ suffix_fragments = (
338
+ *configuration.user_prompt_suffix_fragments,
339
+ *get_user_prompt_suffix_fragments(),
340
+ )
341
+ if suffix_fragments:
342
+ return "\n\n".join([prompt_text, *suffix_fragments])
343
+
344
+ return prompt_text