schemathesis 4.0.0a2__py3-none-any.whl → 4.0.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. schemathesis/cli/__init__.py +15 -4
  2. schemathesis/cli/commands/run/__init__.py +148 -94
  3. schemathesis/cli/commands/run/context.py +72 -2
  4. schemathesis/cli/commands/run/events.py +22 -2
  5. schemathesis/cli/commands/run/executor.py +35 -12
  6. schemathesis/cli/commands/run/filters.py +1 -0
  7. schemathesis/cli/commands/run/handlers/cassettes.py +27 -46
  8. schemathesis/cli/commands/run/handlers/junitxml.py +1 -1
  9. schemathesis/cli/commands/run/handlers/output.py +180 -87
  10. schemathesis/cli/commands/run/hypothesis.py +30 -19
  11. schemathesis/cli/commands/run/reports.py +72 -0
  12. schemathesis/cli/commands/run/validation.py +18 -12
  13. schemathesis/cli/ext/groups.py +42 -13
  14. schemathesis/cli/ext/options.py +15 -8
  15. schemathesis/core/errors.py +85 -9
  16. schemathesis/core/failures.py +2 -1
  17. schemathesis/core/transforms.py +1 -1
  18. schemathesis/engine/core.py +1 -1
  19. schemathesis/engine/errors.py +17 -6
  20. schemathesis/engine/phases/stateful/__init__.py +1 -0
  21. schemathesis/engine/phases/stateful/_executor.py +9 -12
  22. schemathesis/engine/phases/unit/__init__.py +2 -3
  23. schemathesis/engine/phases/unit/_executor.py +16 -13
  24. schemathesis/engine/recorder.py +22 -21
  25. schemathesis/errors.py +23 -13
  26. schemathesis/filters.py +8 -0
  27. schemathesis/generation/coverage.py +10 -5
  28. schemathesis/generation/hypothesis/builder.py +15 -12
  29. schemathesis/generation/stateful/state_machine.py +57 -12
  30. schemathesis/pytest/lazy.py +2 -3
  31. schemathesis/pytest/plugin.py +2 -3
  32. schemathesis/schemas.py +1 -1
  33. schemathesis/specs/openapi/checks.py +77 -37
  34. schemathesis/specs/openapi/expressions/__init__.py +22 -6
  35. schemathesis/specs/openapi/expressions/nodes.py +15 -21
  36. schemathesis/specs/openapi/expressions/parser.py +1 -1
  37. schemathesis/specs/openapi/parameters.py +0 -2
  38. schemathesis/specs/openapi/patterns.py +170 -2
  39. schemathesis/specs/openapi/schemas.py +67 -39
  40. schemathesis/specs/openapi/stateful/__init__.py +207 -84
  41. schemathesis/specs/openapi/stateful/control.py +87 -0
  42. schemathesis/specs/openapi/{links.py → stateful/links.py} +72 -14
  43. {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/METADATA +1 -1
  44. {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/RECORD +47 -45
  45. {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/WHEEL +0 -0
  46. {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/entry_points.txt +0 -0
  47. {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/licenses/LICENSE +0 -0
@@ -41,11 +41,20 @@ if TYPE_CHECKING:
41
41
  from ...schemas import APIOperation
42
42
 
43
43
 
44
+ def is_unexpected_http_status_case(case: Case) -> bool:
45
+ # Skip checks for requests using HTTP methods not defined in the API spec
46
+ return bool(
47
+ case.meta
48
+ and isinstance(case.meta.phase.data, CoveragePhaseData)
49
+ and case.meta.phase.data.description.startswith("Unspecified HTTP method")
50
+ )
51
+
52
+
44
53
  @schemathesis.check
45
54
  def status_code_conformance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
46
55
  from .schemas import BaseOpenAPISchema
47
56
 
48
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
57
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
49
58
  return True
50
59
  responses = case.operation.definition.raw.get("responses", {})
51
60
  # "default" can be used as the default response object for all HTTP codes that are not covered individually
@@ -74,7 +83,7 @@ def _expand_responses(responses: dict[str | int, Any]) -> Generator[int, None, N
74
83
  def content_type_conformance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
75
84
  from .schemas import BaseOpenAPISchema
76
85
 
77
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
86
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
78
87
  return True
79
88
  documented_content_types = case.operation.schema.get_content_types(case.operation, response)
80
89
  if not documented_content_types:
@@ -128,7 +137,7 @@ def response_headers_conformance(ctx: CheckContext, response: Response, case: Ca
128
137
  from .parameters import OpenAPI20Parameter, OpenAPI30Parameter
129
138
  from .schemas import BaseOpenAPISchema, OpenApi30, _maybe_raise_one_or_more
130
139
 
131
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
140
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
132
141
  return True
133
142
  resolved = case.operation.schema.get_headers(case.operation, response)
134
143
  if not resolved:
@@ -208,7 +217,7 @@ def _coerce_header_value(value: str, schema: dict[str, Any]) -> str | int | floa
208
217
  def response_schema_conformance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
209
218
  from .schemas import BaseOpenAPISchema
210
219
 
211
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
220
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
212
221
  return True
213
222
  return case.operation.validate_response(response)
214
223
 
@@ -217,7 +226,11 @@ def response_schema_conformance(ctx: CheckContext, response: Response, case: Cas
217
226
  def negative_data_rejection(ctx: CheckContext, response: Response, case: Case) -> bool | None:
218
227
  from .schemas import BaseOpenAPISchema
219
228
 
220
- if not isinstance(case.operation.schema, BaseOpenAPISchema) or case.meta is None:
229
+ if (
230
+ not isinstance(case.operation.schema, BaseOpenAPISchema)
231
+ or case.meta is None
232
+ or is_unexpected_http_status_case(case)
233
+ ):
221
234
  return True
222
235
 
223
236
  config = ctx.config.get(negative_data_rejection, NegativeDataRejectionConfig())
@@ -241,7 +254,11 @@ def negative_data_rejection(ctx: CheckContext, response: Response, case: Case) -
241
254
  def positive_data_acceptance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
242
255
  from .schemas import BaseOpenAPISchema
243
256
 
244
- if not isinstance(case.operation.schema, BaseOpenAPISchema) or case.meta is None:
257
+ if (
258
+ not isinstance(case.operation.schema, BaseOpenAPISchema)
259
+ or case.meta is None
260
+ or is_unexpected_http_status_case(case)
261
+ ):
245
262
  return True
246
263
 
247
264
  config = ctx.config.get(positive_data_acceptance, PositiveDataAcceptanceConfig())
@@ -260,7 +277,7 @@ def positive_data_acceptance(ctx: CheckContext, response: Response, case: Case)
260
277
  def missing_required_header(ctx: CheckContext, response: Response, case: Case) -> bool | None:
261
278
  # NOTE: This check is intentionally not registered with `@schemathesis.check` because it is experimental
262
279
  meta = case.meta
263
- if meta is None or not isinstance(meta.phase.data, CoveragePhaseData):
280
+ if meta is None or not isinstance(meta.phase.data, CoveragePhaseData) or is_unexpected_http_status_case(case):
264
281
  return None
265
282
  data = meta.phase.data
266
283
  if (
@@ -332,7 +349,7 @@ def has_only_additional_properties_in_non_body_parameters(case: Case) -> bool:
332
349
  def use_after_free(ctx: CheckContext, response: Response, case: Case) -> bool | None:
333
350
  from .schemas import BaseOpenAPISchema
334
351
 
335
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
352
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
336
353
  return True
337
354
  if response.status_code == 404 or response.status_code >= 500:
338
355
  return None
@@ -373,9 +390,13 @@ def use_after_free(ctx: CheckContext, response: Response, case: Case) -> bool |
373
390
  def ensure_resource_availability(ctx: CheckContext, response: Response, case: Case) -> bool | None:
374
391
  from .schemas import BaseOpenAPISchema
375
392
 
376
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
393
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
377
394
  return True
378
395
 
396
+ # First, check if this is a 4XX response
397
+ if not (400 <= response.status_code < 500):
398
+ return None
399
+
379
400
  parent = ctx.find_parent(case_id=case.id)
380
401
  if parent is None:
381
402
  return None
@@ -383,6 +404,17 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
383
404
  if parent_response is None:
384
405
  return None
385
406
 
407
+ if not (
408
+ parent.operation.method.upper() == "POST"
409
+ and 200 <= parent_response.status_code < 400
410
+ and _is_prefix_operation(
411
+ ResourcePath(parent.path, parent.path_parameters or {}),
412
+ ResourcePath(case.path, case.path_parameters or {}),
413
+ )
414
+ ):
415
+ return None
416
+
417
+ # Check if all parameters come from links
386
418
  overrides = case._override
387
419
  overrides_all_parameters = True
388
420
  for parameter in case.operation.iter_parameters():
@@ -390,34 +422,42 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
390
422
  if parameter.name not in getattr(overrides, container, {}):
391
423
  overrides_all_parameters = False
392
424
  break
425
+ if not overrides_all_parameters:
426
+ return None
393
427
 
394
- if (
395
- # Response indicates a client error, even though all available parameters were taken from links
396
- # and comes from a POST request. This case likely means that the POST request actually did not
397
- # save the resource and it is not available for subsequent operations
398
- 400 <= response.status_code < 500
399
- and parent.operation.method.upper() == "POST"
400
- and 200 <= parent_response.status_code < 400
401
- and overrides_all_parameters
402
- and _is_prefix_operation(
403
- ResourcePath(parent.path, parent.path_parameters or {}),
404
- ResourcePath(case.path, case.path_parameters or {}),
405
- )
406
- ):
407
- created_with = parent.operation.label
408
- not_available_with = case.operation.label
409
- reason = http.client.responses.get(response.status_code, "Unknown")
410
- raise EnsureResourceAvailability(
411
- operation=created_with,
412
- message=(
413
- f"The API returned `{response.status_code} {reason}` for a resource that was just created.\n\n"
414
- f"Created with : `{created_with}`\n"
415
- f"Not available with: `{not_available_with}`"
416
- ),
417
- created_with=created_with,
418
- not_available_with=not_available_with,
419
- )
420
- return None
428
+ # Look for any successful DELETE operations on this resource
429
+ for related_case in ctx.find_related(case_id=case.id):
430
+ related_response = ctx.find_response(case_id=related_case.id)
431
+ if (
432
+ related_case.operation.method.upper() == "DELETE"
433
+ and related_response is not None
434
+ and 200 <= related_response.status_code < 300
435
+ and _is_prefix_operation(
436
+ ResourcePath(related_case.path, related_case.path_parameters or {}),
437
+ ResourcePath(case.path, case.path_parameters or {}),
438
+ )
439
+ ):
440
+ # Resource was properly deleted, 404 is expected
441
+ return None
442
+
443
+ # If we got here:
444
+ # 1. Resource was created successfully
445
+ # 2. Current operation returned 4XX
446
+ # 3. All parameters come from links
447
+ # 4. No successful DELETE operations found
448
+ created_with = parent.operation.label
449
+ not_available_with = case.operation.label
450
+ reason = http.client.responses.get(response.status_code, "Unknown")
451
+ raise EnsureResourceAvailability(
452
+ operation=created_with,
453
+ message=(
454
+ f"The API returned `{response.status_code} {reason}` for a resource that was just created.\n\n"
455
+ f"Created with : `{created_with}`\n"
456
+ f"Not available with: `{not_available_with}`"
457
+ ),
458
+ created_with=created_with,
459
+ not_available_with=not_available_with,
460
+ )
421
461
 
422
462
 
423
463
  class AuthKind(enum.Enum):
@@ -430,7 +470,7 @@ def ignored_auth(ctx: CheckContext, response: Response, case: Case) -> bool | No
430
470
  """Check if an operation declares authentication as a requirement but does not actually enforce it."""
431
471
  from .schemas import BaseOpenAPISchema
432
472
 
433
- if not isinstance(case.operation.schema, BaseOpenAPISchema):
473
+ if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
434
474
  return True
435
475
  security_parameters = _get_security_parameters(case.operation)
436
476
  # Authentication is required for this API operation and response is successful
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
  import json
9
9
  from typing import Any
10
10
 
11
+ from schemathesis.core.transforms import UNRESOLVABLE, Unresolvable
11
12
  from schemathesis.generation.stateful.state_machine import StepOutput
12
13
 
13
14
  from . import lexer, nodes, parser
@@ -25,21 +26,36 @@ def evaluate(expr: Any, output: StepOutput, evaluate_nested: bool = False) -> An
25
26
  parts = [node.evaluate(output) for node in parser.parse(expr)]
26
27
  if len(parts) == 1:
27
28
  return parts[0] # keep the return type the same as the internal value type
28
- # otherwise, concatenate into a string
29
+ if any(isinstance(part, Unresolvable) for part in parts):
30
+ return UNRESOLVABLE
29
31
  return "".join(str(part) for part in parts if part is not None)
30
32
 
31
33
 
32
34
  def _evaluate_nested(expr: dict[str, Any] | list, output: StepOutput) -> Any:
33
35
  if isinstance(expr, dict):
34
- return {
35
- _evaluate_object_key(key, output): evaluate(value, output, evaluate_nested=True)
36
- for key, value in expr.items()
37
- }
38
- return [evaluate(item, output, evaluate_nested=True) for item in expr]
36
+ result_dict = {}
37
+ for key, value in expr.items():
38
+ new_key = _evaluate_object_key(key, output)
39
+ if new_key is UNRESOLVABLE:
40
+ return new_key
41
+ new_value = evaluate(value, output, evaluate_nested=True)
42
+ if new_value is UNRESOLVABLE:
43
+ return new_value
44
+ result_dict[new_key] = new_value
45
+ return result_dict
46
+ result_list = []
47
+ for item in expr:
48
+ new_value = evaluate(item, output, evaluate_nested=True)
49
+ if new_value is UNRESOLVABLE:
50
+ return new_value
51
+ result_list.append(new_value)
52
+ return result_list
39
53
 
40
54
 
41
55
  def _evaluate_object_key(key: str, output: StepOutput) -> Any:
42
56
  evaluated = evaluate(key, output)
57
+ if evaluated is UNRESOLVABLE:
58
+ return evaluated
43
59
  if isinstance(evaluated, str):
44
60
  return evaluated
45
61
  if isinstance(evaluated, bool):
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, cast
8
8
 
9
9
  from requests.structures import CaseInsensitiveDict
10
10
 
11
- from schemathesis.core.transforms import UNRESOLVABLE, resolve_pointer
11
+ from schemathesis.core.transforms import UNRESOLVABLE, Unresolvable, resolve_pointer
12
12
  from schemathesis.generation.stateful.state_machine import StepOutput
13
13
  from schemathesis.transport.requests import REQUESTS_TRANSPORT
14
14
 
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
20
20
  class Node:
21
21
  """Generic expression node."""
22
22
 
23
- def evaluate(self, output: StepOutput) -> str:
23
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
24
24
  raise NotImplementedError
25
25
 
26
26
 
@@ -39,7 +39,7 @@ class String(Node):
39
39
 
40
40
  value: str
41
41
 
42
- def evaluate(self, output: StepOutput) -> str:
42
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
43
43
  """String tokens are passed as they are.
44
44
 
45
45
  ``foo{$request.path.id}``
@@ -53,7 +53,7 @@ class String(Node):
53
53
  class URL(Node):
54
54
  """A node for `$url` expression."""
55
55
 
56
- def evaluate(self, output: StepOutput) -> str:
56
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
57
57
  import requests
58
58
 
59
59
  base_url = output.case.operation.base_url or "http://127.0.0.1"
@@ -66,7 +66,7 @@ class URL(Node):
66
66
  class Method(Node):
67
67
  """A node for `$method` expression."""
68
68
 
69
- def evaluate(self, output: StepOutput) -> str:
69
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
70
70
  return output.case.operation.method.upper()
71
71
 
72
72
 
@@ -74,7 +74,7 @@ class Method(Node):
74
74
  class StatusCode(Node):
75
75
  """A node for `$statusCode` expression."""
76
76
 
77
- def evaluate(self, output: StepOutput) -> str:
77
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
78
78
  return str(output.response.status_code)
79
79
 
80
80
 
@@ -86,7 +86,7 @@ class NonBodyRequest(Node):
86
86
  parameter: str
87
87
  extractor: Extractor | None = None
88
88
 
89
- def evaluate(self, output: StepOutput) -> str:
89
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
90
90
  container: dict | CaseInsensitiveDict = {
91
91
  "query": output.case.query,
92
92
  "path": output.case.path_parameters,
@@ -96,9 +96,9 @@ class NonBodyRequest(Node):
96
96
  container = CaseInsensitiveDict(container)
97
97
  value = container.get(self.parameter)
98
98
  if value is None:
99
- return ""
99
+ return UNRESOLVABLE
100
100
  if self.extractor is not None:
101
- return self.extractor.extract(value) or ""
101
+ return self.extractor.extract(value) or UNRESOLVABLE
102
102
  return value
103
103
 
104
104
 
@@ -108,14 +108,11 @@ class BodyRequest(Node):
108
108
 
109
109
  pointer: str | None = None
110
110
 
111
- def evaluate(self, output: StepOutput) -> Any:
111
+ def evaluate(self, output: StepOutput) -> Any | Unresolvable:
112
112
  document = output.case.body
113
113
  if self.pointer is None:
114
114
  return document
115
- resolved = resolve_pointer(document, self.pointer[1:])
116
- if resolved is UNRESOLVABLE:
117
- return None
118
- return resolved
115
+ return resolve_pointer(document, self.pointer[1:])
119
116
 
120
117
 
121
118
  @dataclass
@@ -125,12 +122,12 @@ class HeaderResponse(Node):
125
122
  parameter: str
126
123
  extractor: Extractor | None = None
127
124
 
128
- def evaluate(self, output: StepOutput) -> str:
125
+ def evaluate(self, output: StepOutput) -> str | Unresolvable:
129
126
  value = output.response.headers.get(self.parameter.lower())
130
127
  if value is None:
131
- return ""
128
+ return UNRESOLVABLE
132
129
  if self.extractor is not None:
133
- return self.extractor.extract(value[0]) or ""
130
+ return self.extractor.extract(value[0]) or UNRESOLVABLE
134
131
  return value[0]
135
132
 
136
133
 
@@ -145,7 +142,4 @@ class BodyResponse(Node):
145
142
  if self.pointer is None:
146
143
  # We need the parsed document - data will be serialized before sending to the application
147
144
  return document
148
- resolved = resolve_pointer(document, self.pointer[1:])
149
- if resolved is UNRESOLVABLE:
150
- return None
151
- return resolved
145
+ return resolve_pointer(document, self.pointer[1:])
@@ -46,7 +46,7 @@ def _parse_variable(tokens: lexer.TokenGenerator, token: lexer.Token, expr: str)
46
46
  elif token.value == nodes.NodeType.RESPONSE.value:
47
47
  yield _parse_response(tokens, expr)
48
48
  else:
49
- raise UnknownToken(token.value)
49
+ raise UnknownToken(f"Invalid expression `{expr}`. Unknown token: `{token.value}`")
50
50
 
51
51
 
52
52
  def _parse_request(tokens: lexer.TokenGenerator, expr: str) -> nodes.BodyRequest | nodes.NonBodyRequest:
@@ -376,7 +376,6 @@ def get_parameter_schema(operation: APIOperation, data: dict[str, Any]) -> dict[
376
376
  ),
377
377
  path=operation.path,
378
378
  method=operation.method,
379
- full_path=operation.full_path,
380
379
  )
381
380
  return data["schema"]
382
381
  # https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.3.md#fixed-fields-10
@@ -388,7 +387,6 @@ def get_parameter_schema(operation: APIOperation, data: dict[str, Any]) -> dict[
388
387
  MISSING_SCHEMA_OR_CONTENT_MESSAGE.format(location=data.get("in", ""), name=data.get("name", "<UNKNOWN>")),
389
388
  path=operation.path,
390
389
  method=operation.method,
391
- full_path=operation.full_path,
392
390
  ) from exc
393
391
  options = iter(content.values())
394
392
  media_type_object = next(options)
@@ -66,9 +66,177 @@ def _handle_parsed_pattern(parsed: list, pattern: str, min_length: int | None, m
66
66
  )
67
67
  + trailing_anchor
68
68
  )
69
+ elif (
70
+ len(parsed) > 3
71
+ and parsed[0][0] == ANCHOR
72
+ and parsed[-1][0] == ANCHOR
73
+ and all(op == LITERAL or op in REPEATS for op, _ in parsed[1:-1])
74
+ ):
75
+ return _handle_anchored_pattern(parsed, pattern, min_length, max_length)
69
76
  return pattern
70
77
 
71
78
 
79
+ def _handle_anchored_pattern(parsed: list, pattern: str, min_length: int | None, max_length: int | None) -> str:
80
+ """Update regex pattern with multiple quantified patterns to satisfy length constraints."""
81
+ # Extract anchors
82
+ leading_anchor_length = _get_anchor_length(parsed[0][1])
83
+ trailing_anchor_length = _get_anchor_length(parsed[-1][1])
84
+ leading_anchor = pattern[:leading_anchor_length]
85
+ trailing_anchor = pattern[-trailing_anchor_length:]
86
+
87
+ pattern_parts = parsed[1:-1]
88
+
89
+ # Adjust length constraints by subtracting fixed literals length
90
+ fixed_length = sum(1 for op, _ in pattern_parts if op == LITERAL)
91
+ if min_length is not None:
92
+ min_length -= fixed_length
93
+ if min_length < 0:
94
+ return pattern
95
+ if max_length is not None:
96
+ max_length -= fixed_length
97
+ if max_length < 0:
98
+ return pattern
99
+
100
+ # Extract only min/max bounds from quantified parts
101
+ quantifier_bounds = [value[:2] for op, value in pattern_parts if op in REPEATS]
102
+
103
+ if not quantifier_bounds:
104
+ return pattern
105
+
106
+ length_distribution = _distribute_length_constraints(quantifier_bounds, min_length, max_length)
107
+ if not length_distribution:
108
+ return pattern
109
+
110
+ # Rebuild pattern with updated quantifiers
111
+ result = leading_anchor
112
+ current_position = leading_anchor_length
113
+ distribution_idx = 0
114
+
115
+ for op, value in pattern_parts:
116
+ if op == LITERAL:
117
+ if pattern[current_position] == "\\":
118
+ # Escaped value
119
+ current_position += 2
120
+ result += "\\"
121
+ else:
122
+ current_position += 1
123
+ result += chr(value)
124
+ else:
125
+ new_min, new_max = length_distribution[distribution_idx]
126
+ next_position = _find_quantified_end(pattern, current_position)
127
+ quantified_segment = pattern[current_position:next_position]
128
+ _, _, subpattern = value
129
+ new_value = (new_min, new_max, subpattern)
130
+
131
+ result += _update_quantifier(op, new_value, quantified_segment, new_min, new_max)
132
+ current_position = next_position
133
+ distribution_idx += 1
134
+
135
+ return result + trailing_anchor
136
+
137
+
138
+ def _find_quantified_end(pattern: str, start: int) -> int:
139
+ """Find the end position of current quantified part."""
140
+ char_class_level = 0
141
+ group_level = 0
142
+
143
+ for i in range(start, len(pattern)):
144
+ char = pattern[i]
145
+
146
+ # Handle character class nesting
147
+ if char == "[":
148
+ char_class_level += 1
149
+ elif char == "]":
150
+ char_class_level -= 1
151
+
152
+ # Handle group nesting
153
+ elif char == "(":
154
+ group_level += 1
155
+ elif char == ")":
156
+ group_level -= 1
157
+
158
+ # Only process quantifiers when we're not inside any nested structure
159
+ elif char_class_level == 0 and group_level == 0:
160
+ if char in "*+?":
161
+ return i + 1
162
+ elif char == "{":
163
+ # Find matching }
164
+ while i < len(pattern) and pattern[i] != "}":
165
+ i += 1
166
+ return i + 1
167
+
168
+ return len(pattern)
169
+
170
+
171
+ def _distribute_length_constraints(
172
+ bounds: list[tuple[int, int]], min_length: int | None, max_length: int | None
173
+ ) -> list[tuple[int, int]] | None:
174
+ """Distribute length constraints among quantified pattern parts."""
175
+ # Handle exact length case with dynamic programming
176
+ if min_length == max_length:
177
+ assert min_length is not None
178
+ target = min_length
179
+ dp: dict[tuple[int, int], list[tuple[int, ...]] | None] = {}
180
+
181
+ def find_valid_combination(pos: int, remaining: int) -> list[tuple[int, ...]] | None:
182
+ if (pos, remaining) in dp:
183
+ return dp[(pos, remaining)]
184
+
185
+ if pos == len(bounds):
186
+ return [()] if remaining == 0 else None
187
+
188
+ max_len: int
189
+ min_len, max_len = bounds[pos]
190
+ if max_len == MAXREPEAT:
191
+ max_len = remaining + 1
192
+ else:
193
+ max_len += 1
194
+
195
+ # Try each possible length for current quantifier
196
+ for length in range(min_len, max_len):
197
+ rest = find_valid_combination(pos + 1, remaining - length)
198
+ if rest is not None:
199
+ dp[(pos, remaining)] = [(length,) + r for r in rest]
200
+ return dp[(pos, remaining)]
201
+
202
+ dp[(pos, remaining)] = None
203
+ return None
204
+
205
+ distribution = find_valid_combination(0, target)
206
+ if distribution:
207
+ return [(length, length) for length in distribution[0]]
208
+ return None
209
+
210
+ # Handle range case by distributing min/max bounds
211
+ result = []
212
+ remaining_min = min_length or 0
213
+ remaining_max = max_length or MAXREPEAT
214
+
215
+ for min_repeat, max_repeat in bounds:
216
+ if remaining_min > 0:
217
+ part_min = min(max_repeat, max(min_repeat, remaining_min))
218
+ else:
219
+ part_min = min_repeat
220
+
221
+ if remaining_max < MAXREPEAT:
222
+ part_max = min(max_repeat, remaining_max)
223
+ else:
224
+ part_max = max_repeat
225
+
226
+ if part_min > part_max:
227
+ return None
228
+
229
+ result.append((part_min, part_max))
230
+
231
+ remaining_min = max(0, remaining_min - part_min)
232
+ remaining_max -= part_max if part_max != MAXREPEAT else 0
233
+
234
+ if remaining_min > 0 or remaining_max < 0:
235
+ return None
236
+
237
+ return result
238
+
239
+
72
240
  def _get_anchor_length(node_type: int) -> int:
73
241
  """Determine the length of the anchor based on its type."""
74
242
  if node_type in {sre.AT_BEGINNING_STRING, sre.AT_END_STRING, sre.AT_BOUNDARY, sre.AT_NON_BOUNDARY}:
@@ -93,13 +261,13 @@ def _handle_repeat_quantifier(
93
261
  min_length, max_length = _build_size(min_repeat, max_repeat, min_length, max_length)
94
262
  if min_length > max_length:
95
263
  return pattern
96
- return f"({_strip_quantifier(pattern)})" + _build_quantifier(min_length, max_length)
264
+ return f"({_strip_quantifier(pattern).strip(')(')})" + _build_quantifier(min_length, max_length)
97
265
 
98
266
 
99
267
  def _handle_literal_or_in_quantifier(pattern: str, min_length: int | None, max_length: int | None) -> str:
100
268
  """Handle literal or character class quantifiers."""
101
269
  min_length = 1 if min_length is None else max(min_length, 1)
102
- return f"({pattern})" + _build_quantifier(min_length, max_length)
270
+ return f"({pattern.strip(')(')})" + _build_quantifier(min_length, max_length)
103
271
 
104
272
 
105
273
  def _build_quantifier(minimum: int | None, maximum: int | None) -> str: