claude-dev-env 1.50.0 → 1.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,233 @@
1
+ """Parse gh pr create/edit/comment commands into auditable body content.
2
+
3
+ Tokenizes the captured shell command and extracts the PR body that should be
4
+ audited, resolving body-file paths and rejecting unauditable shell variables,
5
+ stdin sentinels, and path-traversal targets. Positional PR-number extraction
6
+ lives in pr_description_pr_number.py.
7
+ """
8
+
9
+ import shlex
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ _hooks_dir = str(Path(__file__).resolve().parent.parent)
14
+ if _hooks_dir not in sys.path:
15
+ sys.path.insert(0, _hooks_dir)
16
+
17
+ from blocking._gh_body_arg_utils import ( # noqa: E402
18
+ all_body_flags,
19
+ body_file_flag,
20
+ body_file_short_flag,
21
+ count_extra_tokens_to_skip_for_split_quoted_value,
22
+ get_logical_first_line,
23
+ is_flag_shaped_token,
24
+ is_unresolvable_shell_value,
25
+ iter_significant_tokens,
26
+ match_body_file_equals_prefix,
27
+ match_body_flag_equals_prefix,
28
+ match_non_body_value_flag_equals_prefix,
29
+ non_body_value_flags,
30
+ strip_surrounding_quotes,
31
+ )
32
+ from hooks_constants.pr_description_enforcer_constants import ( # noqa: E402
33
+ BODY_FILE_STDIN_SENTINEL,
34
+ )
35
+ from hooks_constants.setup_project_paths_constants import UTF8_ENCODING # noqa: E402
36
+
37
+
38
+ class PathTraversalError(Exception):
39
+ pass
40
+
41
+
42
+ def _read_body_file_contents(file_path: str) -> str | None:
43
+ given_path = Path(file_path)
44
+ allowed_root = Path.cwd().resolve()
45
+ if given_path.is_symlink():
46
+ resolved_target = given_path.resolve()
47
+ try:
48
+ resolved_target.relative_to(allowed_root)
49
+ except ValueError:
50
+ raise PathTraversalError("symlink target resolves outside allowed root")
51
+ resolved_path = given_path.resolve()
52
+ if not given_path.is_absolute():
53
+ try:
54
+ resolved_path.relative_to(allowed_root)
55
+ except ValueError:
56
+ raise PathTraversalError("relative path resolves outside allowed root")
57
+ try:
58
+ with open(resolved_path, "r", encoding=UTF8_ENCODING, errors="replace") as body_file:
59
+ return body_file.read()
60
+ except (FileNotFoundError, IsADirectoryError, PermissionError, OSError):
61
+ return None
62
+
63
+
64
+ def _resolve_body_file_value(raw_value_token: str) -> str | None:
65
+ """Return file contents, or None when the body cannot be audited.
66
+
67
+ None means body is present but unauditable -- skip enforcement.
68
+ This covers: stdin sentinel, unresolvable shell variables, and path-traversal-rejected paths.
69
+ """
70
+ stripped_value = strip_surrounding_quotes(raw_value_token)
71
+ if not stripped_value:
72
+ return None
73
+ if stripped_value == BODY_FILE_STDIN_SENTINEL:
74
+ return None
75
+ if is_unresolvable_shell_value(stripped_value):
76
+ return None
77
+ try:
78
+ return _read_body_file_contents(stripped_value)
79
+ except PathTraversalError:
80
+ return None
81
+
82
+
83
+ def _resolve_body_string_value(raw_value_token: str) -> str | None:
84
+ """Return the literal body string, or None when the value is an
85
+ unresolvable shell variable.
86
+
87
+ Distinguishing the two cases lets `pr_description_enforcer.main()` skip enforcement only for
88
+ unauditable bodies; a literal `--body ""` still returns `""` and flows
89
+ into `validate_pr_body` so the substantive-prose check blocks it.
90
+ """
91
+ stripped_value = strip_surrounding_quotes(raw_value_token)
92
+ if is_unresolvable_shell_value(stripped_value):
93
+ return None
94
+ return stripped_value
95
+
96
+
97
+ def _reassemble_split_quoted_value(
98
+ first_value_token: str, all_remaining_tokens: list[str]
99
+ ) -> str | None:
100
+ extra_tokens_consumed = count_extra_tokens_to_skip_for_split_quoted_value(
101
+ all_remaining_tokens,
102
+ first_value_token,
103
+ )
104
+ if extra_tokens_consumed is None:
105
+ return None
106
+ if extra_tokens_consumed == 0:
107
+ return first_value_token
108
+ continuation_tokens = all_remaining_tokens[:extra_tokens_consumed]
109
+ return " ".join([first_value_token, *continuation_tokens])
110
+
111
+
112
+ def _scan_raw_tokens_for_body(all_raw_tokens: list[str]) -> str | None | bool:
113
+ """Return the body value from a raw token list, or False if no body flag found.
114
+
115
+ Returns False when no body/body-file flag is present (caller should continue).
116
+ Returns None when a body-file flag is present but malformed (no value
117
+ follows), OR when the body value is an unresolvable shell variable (e.g.
118
+ `--body "$VAR"`) — in either case the body is unauditable and the caller
119
+ skips enforcement.
120
+ Returns str for resolved body string values. An empty string `""` is a
121
+ literal-empty body (e.g. `--body ""`) and must still flow into
122
+ `validate_pr_body` so the substantive-prose check blocks it.
123
+ """
124
+ token_index = 0
125
+ while token_index < len(all_raw_tokens):
126
+ current_token = all_raw_tokens[token_index]
127
+ remaining_raw = all_raw_tokens[token_index + 1 :]
128
+ non_body_equals_prefix = match_non_body_value_flag_equals_prefix(current_token)
129
+ if non_body_equals_prefix is not None:
130
+ first_value_token = current_token[len(non_body_equals_prefix) :]
131
+ extra_skip = count_extra_tokens_to_skip_for_split_quoted_value(
132
+ remaining_raw, first_value_token
133
+ )
134
+ token_index += 1 + (extra_skip or 0)
135
+ continue
136
+ if current_token in non_body_value_flags:
137
+ if remaining_raw and not is_flag_shaped_token(remaining_raw[0]):
138
+ first_value_token = remaining_raw[0]
139
+ extra_skip = count_extra_tokens_to_skip_for_split_quoted_value(
140
+ remaining_raw[1:], first_value_token
141
+ )
142
+ token_index += 1 + 1 + (extra_skip or 0)
143
+ continue
144
+ token_index += 1
145
+ continue
146
+ body_equals_prefix = match_body_flag_equals_prefix(current_token)
147
+ if body_equals_prefix is not None:
148
+ first_value_token = current_token[len(body_equals_prefix) :]
149
+ full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw)
150
+ if full_value_token is None:
151
+ return None
152
+ return _resolve_body_string_value(full_value_token)
153
+ body_file_equals_prefix = match_body_file_equals_prefix(current_token)
154
+ if body_file_equals_prefix is not None:
155
+ first_value_token = current_token[len(body_file_equals_prefix) :]
156
+ full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw)
157
+ if full_value_token is None:
158
+ return None
159
+ return _resolve_body_file_value(full_value_token)
160
+ if current_token in all_body_flags:
161
+ if not remaining_raw or is_flag_shaped_token(remaining_raw[0]):
162
+ return None
163
+ first_value_token = remaining_raw[0]
164
+ full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw[1:])
165
+ if full_value_token is None:
166
+ return None
167
+ return _resolve_body_string_value(full_value_token)
168
+ if current_token in {body_file_flag, body_file_short_flag}:
169
+ if not remaining_raw or is_flag_shaped_token(remaining_raw[0]):
170
+ return None
171
+ first_value_token = remaining_raw[0]
172
+ full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw[1:])
173
+ if full_value_token is None:
174
+ return None
175
+ return _resolve_body_file_value(full_value_token)
176
+ token_index += 1
177
+ return False
178
+
179
+
180
+ def extract_body_from_command(
181
+ command: str,
182
+ all_pre_tokenized: tuple[str, list[str]] | None = None,
183
+ ) -> str | None:
184
+ """Return the PR body content for validation, or None if unextractable.
185
+
186
+ Uses iter_significant_tokens to skip values of non-body value-taking flags
187
+ so that --body/--body-file embedded in a quoted --title value never false-matches.
188
+ For space-form body-file flags, scans the raw token list directly because
189
+ iter_significant_tokens consumes the value token (yielding remaining-after-value).
190
+
191
+ If all_pre_tokenized is provided as (logical_line, raw_tokens), reuses those instead
192
+ of recomputing the logical line and shlex split a second time.
193
+ """
194
+ if all_pre_tokenized is not None:
195
+ logical_line, all_raw_tokens = all_pre_tokenized
196
+ else:
197
+ logical_line = get_logical_first_line(command)
198
+ if not logical_line:
199
+ return None
200
+ try:
201
+ all_raw_tokens = shlex.split(logical_line, posix=False)
202
+ except ValueError:
203
+ return None
204
+ try:
205
+ all_significant_tokens = list(
206
+ iter_significant_tokens(command, pre_tokenized=(logical_line, all_raw_tokens))
207
+ )
208
+ except ValueError:
209
+ return None
210
+
211
+ significant_token_set = {each_token for each_token, _ in all_significant_tokens}
212
+ body_flag_found_in_significant = (
213
+ any(each_token in all_body_flags for each_token in significant_token_set)
214
+ or any(
215
+ match_body_flag_equals_prefix(each_token) is not None
216
+ for each_token in significant_token_set
217
+ )
218
+ or any(
219
+ match_body_file_equals_prefix(each_token) is not None
220
+ for each_token in significant_token_set
221
+ )
222
+ or any(
223
+ each_token in {body_file_flag, body_file_short_flag}
224
+ for each_token in significant_token_set
225
+ )
226
+ )
227
+ if not body_flag_found_in_significant:
228
+ return None
229
+
230
+ scan_outcome = _scan_raw_tokens_for_body(all_raw_tokens)
231
+ if isinstance(scan_outcome, bool):
232
+ return None
233
+ return scan_outcome