schemathesis 4.0.0a2__py3-none-any.whl → 4.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- schemathesis/cli/__init__.py +15 -4
- schemathesis/cli/commands/run/__init__.py +148 -94
- schemathesis/cli/commands/run/context.py +72 -2
- schemathesis/cli/commands/run/events.py +22 -2
- schemathesis/cli/commands/run/executor.py +35 -12
- schemathesis/cli/commands/run/filters.py +1 -0
- schemathesis/cli/commands/run/handlers/cassettes.py +27 -46
- schemathesis/cli/commands/run/handlers/junitxml.py +1 -1
- schemathesis/cli/commands/run/handlers/output.py +180 -87
- schemathesis/cli/commands/run/hypothesis.py +30 -19
- schemathesis/cli/commands/run/reports.py +72 -0
- schemathesis/cli/commands/run/validation.py +18 -12
- schemathesis/cli/ext/groups.py +42 -13
- schemathesis/cli/ext/options.py +15 -8
- schemathesis/core/errors.py +85 -9
- schemathesis/core/failures.py +2 -1
- schemathesis/core/transforms.py +1 -1
- schemathesis/engine/core.py +1 -1
- schemathesis/engine/errors.py +17 -6
- schemathesis/engine/phases/stateful/__init__.py +1 -0
- schemathesis/engine/phases/stateful/_executor.py +9 -12
- schemathesis/engine/phases/unit/__init__.py +2 -3
- schemathesis/engine/phases/unit/_executor.py +16 -13
- schemathesis/engine/recorder.py +22 -21
- schemathesis/errors.py +23 -13
- schemathesis/filters.py +8 -0
- schemathesis/generation/coverage.py +10 -5
- schemathesis/generation/hypothesis/builder.py +15 -12
- schemathesis/generation/stateful/state_machine.py +57 -12
- schemathesis/pytest/lazy.py +2 -3
- schemathesis/pytest/plugin.py +2 -3
- schemathesis/schemas.py +1 -1
- schemathesis/specs/openapi/checks.py +77 -37
- schemathesis/specs/openapi/expressions/__init__.py +22 -6
- schemathesis/specs/openapi/expressions/nodes.py +15 -21
- schemathesis/specs/openapi/expressions/parser.py +1 -1
- schemathesis/specs/openapi/parameters.py +0 -2
- schemathesis/specs/openapi/patterns.py +170 -2
- schemathesis/specs/openapi/schemas.py +67 -39
- schemathesis/specs/openapi/stateful/__init__.py +207 -84
- schemathesis/specs/openapi/stateful/control.py +87 -0
- schemathesis/specs/openapi/{links.py → stateful/links.py} +72 -14
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/METADATA +1 -1
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/RECORD +47 -45
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/WHEEL +0 -0
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/entry_points.txt +0 -0
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a4.dist-info}/licenses/LICENSE +0 -0
@@ -41,11 +41,20 @@ if TYPE_CHECKING:
|
|
41
41
|
from ...schemas import APIOperation
|
42
42
|
|
43
43
|
|
44
|
+
def is_unexpected_http_status_case(case: Case) -> bool:
|
45
|
+
# Skip checks for requests using HTTP methods not defined in the API spec
|
46
|
+
return bool(
|
47
|
+
case.meta
|
48
|
+
and isinstance(case.meta.phase.data, CoveragePhaseData)
|
49
|
+
and case.meta.phase.data.description.startswith("Unspecified HTTP method")
|
50
|
+
)
|
51
|
+
|
52
|
+
|
44
53
|
@schemathesis.check
|
45
54
|
def status_code_conformance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
46
55
|
from .schemas import BaseOpenAPISchema
|
47
56
|
|
48
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
57
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
49
58
|
return True
|
50
59
|
responses = case.operation.definition.raw.get("responses", {})
|
51
60
|
# "default" can be used as the default response object for all HTTP codes that are not covered individually
|
@@ -74,7 +83,7 @@ def _expand_responses(responses: dict[str | int, Any]) -> Generator[int, None, N
|
|
74
83
|
def content_type_conformance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
75
84
|
from .schemas import BaseOpenAPISchema
|
76
85
|
|
77
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
86
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
78
87
|
return True
|
79
88
|
documented_content_types = case.operation.schema.get_content_types(case.operation, response)
|
80
89
|
if not documented_content_types:
|
@@ -128,7 +137,7 @@ def response_headers_conformance(ctx: CheckContext, response: Response, case: Ca
|
|
128
137
|
from .parameters import OpenAPI20Parameter, OpenAPI30Parameter
|
129
138
|
from .schemas import BaseOpenAPISchema, OpenApi30, _maybe_raise_one_or_more
|
130
139
|
|
131
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
140
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
132
141
|
return True
|
133
142
|
resolved = case.operation.schema.get_headers(case.operation, response)
|
134
143
|
if not resolved:
|
@@ -208,7 +217,7 @@ def _coerce_header_value(value: str, schema: dict[str, Any]) -> str | int | floa
|
|
208
217
|
def response_schema_conformance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
209
218
|
from .schemas import BaseOpenAPISchema
|
210
219
|
|
211
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
220
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
212
221
|
return True
|
213
222
|
return case.operation.validate_response(response)
|
214
223
|
|
@@ -217,7 +226,11 @@ def response_schema_conformance(ctx: CheckContext, response: Response, case: Cas
|
|
217
226
|
def negative_data_rejection(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
218
227
|
from .schemas import BaseOpenAPISchema
|
219
228
|
|
220
|
-
if
|
229
|
+
if (
|
230
|
+
not isinstance(case.operation.schema, BaseOpenAPISchema)
|
231
|
+
or case.meta is None
|
232
|
+
or is_unexpected_http_status_case(case)
|
233
|
+
):
|
221
234
|
return True
|
222
235
|
|
223
236
|
config = ctx.config.get(negative_data_rejection, NegativeDataRejectionConfig())
|
@@ -241,7 +254,11 @@ def negative_data_rejection(ctx: CheckContext, response: Response, case: Case) -
|
|
241
254
|
def positive_data_acceptance(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
242
255
|
from .schemas import BaseOpenAPISchema
|
243
256
|
|
244
|
-
if
|
257
|
+
if (
|
258
|
+
not isinstance(case.operation.schema, BaseOpenAPISchema)
|
259
|
+
or case.meta is None
|
260
|
+
or is_unexpected_http_status_case(case)
|
261
|
+
):
|
245
262
|
return True
|
246
263
|
|
247
264
|
config = ctx.config.get(positive_data_acceptance, PositiveDataAcceptanceConfig())
|
@@ -260,7 +277,7 @@ def positive_data_acceptance(ctx: CheckContext, response: Response, case: Case)
|
|
260
277
|
def missing_required_header(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
261
278
|
# NOTE: This check is intentionally not registered with `@schemathesis.check` because it is experimental
|
262
279
|
meta = case.meta
|
263
|
-
if meta is None or not isinstance(meta.phase.data, CoveragePhaseData):
|
280
|
+
if meta is None or not isinstance(meta.phase.data, CoveragePhaseData) or is_unexpected_http_status_case(case):
|
264
281
|
return None
|
265
282
|
data = meta.phase.data
|
266
283
|
if (
|
@@ -332,7 +349,7 @@ def has_only_additional_properties_in_non_body_parameters(case: Case) -> bool:
|
|
332
349
|
def use_after_free(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
333
350
|
from .schemas import BaseOpenAPISchema
|
334
351
|
|
335
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
352
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
336
353
|
return True
|
337
354
|
if response.status_code == 404 or response.status_code >= 500:
|
338
355
|
return None
|
@@ -373,9 +390,13 @@ def use_after_free(ctx: CheckContext, response: Response, case: Case) -> bool |
|
|
373
390
|
def ensure_resource_availability(ctx: CheckContext, response: Response, case: Case) -> bool | None:
|
374
391
|
from .schemas import BaseOpenAPISchema
|
375
392
|
|
376
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
393
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
377
394
|
return True
|
378
395
|
|
396
|
+
# First, check if this is a 4XX response
|
397
|
+
if not (400 <= response.status_code < 500):
|
398
|
+
return None
|
399
|
+
|
379
400
|
parent = ctx.find_parent(case_id=case.id)
|
380
401
|
if parent is None:
|
381
402
|
return None
|
@@ -383,6 +404,17 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
|
|
383
404
|
if parent_response is None:
|
384
405
|
return None
|
385
406
|
|
407
|
+
if not (
|
408
|
+
parent.operation.method.upper() == "POST"
|
409
|
+
and 200 <= parent_response.status_code < 400
|
410
|
+
and _is_prefix_operation(
|
411
|
+
ResourcePath(parent.path, parent.path_parameters or {}),
|
412
|
+
ResourcePath(case.path, case.path_parameters or {}),
|
413
|
+
)
|
414
|
+
):
|
415
|
+
return None
|
416
|
+
|
417
|
+
# Check if all parameters come from links
|
386
418
|
overrides = case._override
|
387
419
|
overrides_all_parameters = True
|
388
420
|
for parameter in case.operation.iter_parameters():
|
@@ -390,34 +422,42 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
|
|
390
422
|
if parameter.name not in getattr(overrides, container, {}):
|
391
423
|
overrides_all_parameters = False
|
392
424
|
break
|
425
|
+
if not overrides_all_parameters:
|
426
|
+
return None
|
393
427
|
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
)
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
428
|
+
# Look for any successful DELETE operations on this resource
|
429
|
+
for related_case in ctx.find_related(case_id=case.id):
|
430
|
+
related_response = ctx.find_response(case_id=related_case.id)
|
431
|
+
if (
|
432
|
+
related_case.operation.method.upper() == "DELETE"
|
433
|
+
and related_response is not None
|
434
|
+
and 200 <= related_response.status_code < 300
|
435
|
+
and _is_prefix_operation(
|
436
|
+
ResourcePath(related_case.path, related_case.path_parameters or {}),
|
437
|
+
ResourcePath(case.path, case.path_parameters or {}),
|
438
|
+
)
|
439
|
+
):
|
440
|
+
# Resource was properly deleted, 404 is expected
|
441
|
+
return None
|
442
|
+
|
443
|
+
# If we got here:
|
444
|
+
# 1. Resource was created successfully
|
445
|
+
# 2. Current operation returned 4XX
|
446
|
+
# 3. All parameters come from links
|
447
|
+
# 4. No successful DELETE operations found
|
448
|
+
created_with = parent.operation.label
|
449
|
+
not_available_with = case.operation.label
|
450
|
+
reason = http.client.responses.get(response.status_code, "Unknown")
|
451
|
+
raise EnsureResourceAvailability(
|
452
|
+
operation=created_with,
|
453
|
+
message=(
|
454
|
+
f"The API returned `{response.status_code} {reason}` for a resource that was just created.\n\n"
|
455
|
+
f"Created with : `{created_with}`\n"
|
456
|
+
f"Not available with: `{not_available_with}`"
|
457
|
+
),
|
458
|
+
created_with=created_with,
|
459
|
+
not_available_with=not_available_with,
|
460
|
+
)
|
421
461
|
|
422
462
|
|
423
463
|
class AuthKind(enum.Enum):
|
@@ -430,7 +470,7 @@ def ignored_auth(ctx: CheckContext, response: Response, case: Case) -> bool | No
|
|
430
470
|
"""Check if an operation declares authentication as a requirement but does not actually enforce it."""
|
431
471
|
from .schemas import BaseOpenAPISchema
|
432
472
|
|
433
|
-
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
473
|
+
if not isinstance(case.operation.schema, BaseOpenAPISchema) or is_unexpected_http_status_case(case):
|
434
474
|
return True
|
435
475
|
security_parameters = _get_security_parameters(case.operation)
|
436
476
|
# Authentication is required for this API operation and response is successful
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
8
8
|
import json
|
9
9
|
from typing import Any
|
10
10
|
|
11
|
+
from schemathesis.core.transforms import UNRESOLVABLE, Unresolvable
|
11
12
|
from schemathesis.generation.stateful.state_machine import StepOutput
|
12
13
|
|
13
14
|
from . import lexer, nodes, parser
|
@@ -25,21 +26,36 @@ def evaluate(expr: Any, output: StepOutput, evaluate_nested: bool = False) -> An
|
|
25
26
|
parts = [node.evaluate(output) for node in parser.parse(expr)]
|
26
27
|
if len(parts) == 1:
|
27
28
|
return parts[0] # keep the return type the same as the internal value type
|
28
|
-
|
29
|
+
if any(isinstance(part, Unresolvable) for part in parts):
|
30
|
+
return UNRESOLVABLE
|
29
31
|
return "".join(str(part) for part in parts if part is not None)
|
30
32
|
|
31
33
|
|
32
34
|
def _evaluate_nested(expr: dict[str, Any] | list, output: StepOutput) -> Any:
|
33
35
|
if isinstance(expr, dict):
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
result_dict = {}
|
37
|
+
for key, value in expr.items():
|
38
|
+
new_key = _evaluate_object_key(key, output)
|
39
|
+
if new_key is UNRESOLVABLE:
|
40
|
+
return new_key
|
41
|
+
new_value = evaluate(value, output, evaluate_nested=True)
|
42
|
+
if new_value is UNRESOLVABLE:
|
43
|
+
return new_value
|
44
|
+
result_dict[new_key] = new_value
|
45
|
+
return result_dict
|
46
|
+
result_list = []
|
47
|
+
for item in expr:
|
48
|
+
new_value = evaluate(item, output, evaluate_nested=True)
|
49
|
+
if new_value is UNRESOLVABLE:
|
50
|
+
return new_value
|
51
|
+
result_list.append(new_value)
|
52
|
+
return result_list
|
39
53
|
|
40
54
|
|
41
55
|
def _evaluate_object_key(key: str, output: StepOutput) -> Any:
|
42
56
|
evaluated = evaluate(key, output)
|
57
|
+
if evaluated is UNRESOLVABLE:
|
58
|
+
return evaluated
|
43
59
|
if isinstance(evaluated, str):
|
44
60
|
return evaluated
|
45
61
|
if isinstance(evaluated, bool):
|
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, cast
|
|
8
8
|
|
9
9
|
from requests.structures import CaseInsensitiveDict
|
10
10
|
|
11
|
-
from schemathesis.core.transforms import UNRESOLVABLE, resolve_pointer
|
11
|
+
from schemathesis.core.transforms import UNRESOLVABLE, Unresolvable, resolve_pointer
|
12
12
|
from schemathesis.generation.stateful.state_machine import StepOutput
|
13
13
|
from schemathesis.transport.requests import REQUESTS_TRANSPORT
|
14
14
|
|
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
|
|
20
20
|
class Node:
|
21
21
|
"""Generic expression node."""
|
22
22
|
|
23
|
-
def evaluate(self, output: StepOutput) -> str:
|
23
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
24
24
|
raise NotImplementedError
|
25
25
|
|
26
26
|
|
@@ -39,7 +39,7 @@ class String(Node):
|
|
39
39
|
|
40
40
|
value: str
|
41
41
|
|
42
|
-
def evaluate(self, output: StepOutput) -> str:
|
42
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
43
43
|
"""String tokens are passed as they are.
|
44
44
|
|
45
45
|
``foo{$request.path.id}``
|
@@ -53,7 +53,7 @@ class String(Node):
|
|
53
53
|
class URL(Node):
|
54
54
|
"""A node for `$url` expression."""
|
55
55
|
|
56
|
-
def evaluate(self, output: StepOutput) -> str:
|
56
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
57
57
|
import requests
|
58
58
|
|
59
59
|
base_url = output.case.operation.base_url or "http://127.0.0.1"
|
@@ -66,7 +66,7 @@ class URL(Node):
|
|
66
66
|
class Method(Node):
|
67
67
|
"""A node for `$method` expression."""
|
68
68
|
|
69
|
-
def evaluate(self, output: StepOutput) -> str:
|
69
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
70
70
|
return output.case.operation.method.upper()
|
71
71
|
|
72
72
|
|
@@ -74,7 +74,7 @@ class Method(Node):
|
|
74
74
|
class StatusCode(Node):
|
75
75
|
"""A node for `$statusCode` expression."""
|
76
76
|
|
77
|
-
def evaluate(self, output: StepOutput) -> str:
|
77
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
78
78
|
return str(output.response.status_code)
|
79
79
|
|
80
80
|
|
@@ -86,7 +86,7 @@ class NonBodyRequest(Node):
|
|
86
86
|
parameter: str
|
87
87
|
extractor: Extractor | None = None
|
88
88
|
|
89
|
-
def evaluate(self, output: StepOutput) -> str:
|
89
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
90
90
|
container: dict | CaseInsensitiveDict = {
|
91
91
|
"query": output.case.query,
|
92
92
|
"path": output.case.path_parameters,
|
@@ -96,9 +96,9 @@ class NonBodyRequest(Node):
|
|
96
96
|
container = CaseInsensitiveDict(container)
|
97
97
|
value = container.get(self.parameter)
|
98
98
|
if value is None:
|
99
|
-
return
|
99
|
+
return UNRESOLVABLE
|
100
100
|
if self.extractor is not None:
|
101
|
-
return self.extractor.extract(value) or
|
101
|
+
return self.extractor.extract(value) or UNRESOLVABLE
|
102
102
|
return value
|
103
103
|
|
104
104
|
|
@@ -108,14 +108,11 @@ class BodyRequest(Node):
|
|
108
108
|
|
109
109
|
pointer: str | None = None
|
110
110
|
|
111
|
-
def evaluate(self, output: StepOutput) -> Any:
|
111
|
+
def evaluate(self, output: StepOutput) -> Any | Unresolvable:
|
112
112
|
document = output.case.body
|
113
113
|
if self.pointer is None:
|
114
114
|
return document
|
115
|
-
|
116
|
-
if resolved is UNRESOLVABLE:
|
117
|
-
return None
|
118
|
-
return resolved
|
115
|
+
return resolve_pointer(document, self.pointer[1:])
|
119
116
|
|
120
117
|
|
121
118
|
@dataclass
|
@@ -125,12 +122,12 @@ class HeaderResponse(Node):
|
|
125
122
|
parameter: str
|
126
123
|
extractor: Extractor | None = None
|
127
124
|
|
128
|
-
def evaluate(self, output: StepOutput) -> str:
|
125
|
+
def evaluate(self, output: StepOutput) -> str | Unresolvable:
|
129
126
|
value = output.response.headers.get(self.parameter.lower())
|
130
127
|
if value is None:
|
131
|
-
return
|
128
|
+
return UNRESOLVABLE
|
132
129
|
if self.extractor is not None:
|
133
|
-
return self.extractor.extract(value[0]) or
|
130
|
+
return self.extractor.extract(value[0]) or UNRESOLVABLE
|
134
131
|
return value[0]
|
135
132
|
|
136
133
|
|
@@ -145,7 +142,4 @@ class BodyResponse(Node):
|
|
145
142
|
if self.pointer is None:
|
146
143
|
# We need the parsed document - data will be serialized before sending to the application
|
147
144
|
return document
|
148
|
-
|
149
|
-
if resolved is UNRESOLVABLE:
|
150
|
-
return None
|
151
|
-
return resolved
|
145
|
+
return resolve_pointer(document, self.pointer[1:])
|
@@ -46,7 +46,7 @@ def _parse_variable(tokens: lexer.TokenGenerator, token: lexer.Token, expr: str)
|
|
46
46
|
elif token.value == nodes.NodeType.RESPONSE.value:
|
47
47
|
yield _parse_response(tokens, expr)
|
48
48
|
else:
|
49
|
-
raise UnknownToken(token.value)
|
49
|
+
raise UnknownToken(f"Invalid expression `{expr}`. Unknown token: `{token.value}`")
|
50
50
|
|
51
51
|
|
52
52
|
def _parse_request(tokens: lexer.TokenGenerator, expr: str) -> nodes.BodyRequest | nodes.NonBodyRequest:
|
@@ -376,7 +376,6 @@ def get_parameter_schema(operation: APIOperation, data: dict[str, Any]) -> dict[
|
|
376
376
|
),
|
377
377
|
path=operation.path,
|
378
378
|
method=operation.method,
|
379
|
-
full_path=operation.full_path,
|
380
379
|
)
|
381
380
|
return data["schema"]
|
382
381
|
# https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.3.md#fixed-fields-10
|
@@ -388,7 +387,6 @@ def get_parameter_schema(operation: APIOperation, data: dict[str, Any]) -> dict[
|
|
388
387
|
MISSING_SCHEMA_OR_CONTENT_MESSAGE.format(location=data.get("in", ""), name=data.get("name", "<UNKNOWN>")),
|
389
388
|
path=operation.path,
|
390
389
|
method=operation.method,
|
391
|
-
full_path=operation.full_path,
|
392
390
|
) from exc
|
393
391
|
options = iter(content.values())
|
394
392
|
media_type_object = next(options)
|
@@ -66,9 +66,177 @@ def _handle_parsed_pattern(parsed: list, pattern: str, min_length: int | None, m
|
|
66
66
|
)
|
67
67
|
+ trailing_anchor
|
68
68
|
)
|
69
|
+
elif (
|
70
|
+
len(parsed) > 3
|
71
|
+
and parsed[0][0] == ANCHOR
|
72
|
+
and parsed[-1][0] == ANCHOR
|
73
|
+
and all(op == LITERAL or op in REPEATS for op, _ in parsed[1:-1])
|
74
|
+
):
|
75
|
+
return _handle_anchored_pattern(parsed, pattern, min_length, max_length)
|
69
76
|
return pattern
|
70
77
|
|
71
78
|
|
79
|
+
def _handle_anchored_pattern(parsed: list, pattern: str, min_length: int | None, max_length: int | None) -> str:
|
80
|
+
"""Update regex pattern with multiple quantified patterns to satisfy length constraints."""
|
81
|
+
# Extract anchors
|
82
|
+
leading_anchor_length = _get_anchor_length(parsed[0][1])
|
83
|
+
trailing_anchor_length = _get_anchor_length(parsed[-1][1])
|
84
|
+
leading_anchor = pattern[:leading_anchor_length]
|
85
|
+
trailing_anchor = pattern[-trailing_anchor_length:]
|
86
|
+
|
87
|
+
pattern_parts = parsed[1:-1]
|
88
|
+
|
89
|
+
# Adjust length constraints by subtracting fixed literals length
|
90
|
+
fixed_length = sum(1 for op, _ in pattern_parts if op == LITERAL)
|
91
|
+
if min_length is not None:
|
92
|
+
min_length -= fixed_length
|
93
|
+
if min_length < 0:
|
94
|
+
return pattern
|
95
|
+
if max_length is not None:
|
96
|
+
max_length -= fixed_length
|
97
|
+
if max_length < 0:
|
98
|
+
return pattern
|
99
|
+
|
100
|
+
# Extract only min/max bounds from quantified parts
|
101
|
+
quantifier_bounds = [value[:2] for op, value in pattern_parts if op in REPEATS]
|
102
|
+
|
103
|
+
if not quantifier_bounds:
|
104
|
+
return pattern
|
105
|
+
|
106
|
+
length_distribution = _distribute_length_constraints(quantifier_bounds, min_length, max_length)
|
107
|
+
if not length_distribution:
|
108
|
+
return pattern
|
109
|
+
|
110
|
+
# Rebuild pattern with updated quantifiers
|
111
|
+
result = leading_anchor
|
112
|
+
current_position = leading_anchor_length
|
113
|
+
distribution_idx = 0
|
114
|
+
|
115
|
+
for op, value in pattern_parts:
|
116
|
+
if op == LITERAL:
|
117
|
+
if pattern[current_position] == "\\":
|
118
|
+
# Escaped value
|
119
|
+
current_position += 2
|
120
|
+
result += "\\"
|
121
|
+
else:
|
122
|
+
current_position += 1
|
123
|
+
result += chr(value)
|
124
|
+
else:
|
125
|
+
new_min, new_max = length_distribution[distribution_idx]
|
126
|
+
next_position = _find_quantified_end(pattern, current_position)
|
127
|
+
quantified_segment = pattern[current_position:next_position]
|
128
|
+
_, _, subpattern = value
|
129
|
+
new_value = (new_min, new_max, subpattern)
|
130
|
+
|
131
|
+
result += _update_quantifier(op, new_value, quantified_segment, new_min, new_max)
|
132
|
+
current_position = next_position
|
133
|
+
distribution_idx += 1
|
134
|
+
|
135
|
+
return result + trailing_anchor
|
136
|
+
|
137
|
+
|
138
|
+
def _find_quantified_end(pattern: str, start: int) -> int:
|
139
|
+
"""Find the end position of current quantified part."""
|
140
|
+
char_class_level = 0
|
141
|
+
group_level = 0
|
142
|
+
|
143
|
+
for i in range(start, len(pattern)):
|
144
|
+
char = pattern[i]
|
145
|
+
|
146
|
+
# Handle character class nesting
|
147
|
+
if char == "[":
|
148
|
+
char_class_level += 1
|
149
|
+
elif char == "]":
|
150
|
+
char_class_level -= 1
|
151
|
+
|
152
|
+
# Handle group nesting
|
153
|
+
elif char == "(":
|
154
|
+
group_level += 1
|
155
|
+
elif char == ")":
|
156
|
+
group_level -= 1
|
157
|
+
|
158
|
+
# Only process quantifiers when we're not inside any nested structure
|
159
|
+
elif char_class_level == 0 and group_level == 0:
|
160
|
+
if char in "*+?":
|
161
|
+
return i + 1
|
162
|
+
elif char == "{":
|
163
|
+
# Find matching }
|
164
|
+
while i < len(pattern) and pattern[i] != "}":
|
165
|
+
i += 1
|
166
|
+
return i + 1
|
167
|
+
|
168
|
+
return len(pattern)
|
169
|
+
|
170
|
+
|
171
|
+
def _distribute_length_constraints(
|
172
|
+
bounds: list[tuple[int, int]], min_length: int | None, max_length: int | None
|
173
|
+
) -> list[tuple[int, int]] | None:
|
174
|
+
"""Distribute length constraints among quantified pattern parts."""
|
175
|
+
# Handle exact length case with dynamic programming
|
176
|
+
if min_length == max_length:
|
177
|
+
assert min_length is not None
|
178
|
+
target = min_length
|
179
|
+
dp: dict[tuple[int, int], list[tuple[int, ...]] | None] = {}
|
180
|
+
|
181
|
+
def find_valid_combination(pos: int, remaining: int) -> list[tuple[int, ...]] | None:
|
182
|
+
if (pos, remaining) in dp:
|
183
|
+
return dp[(pos, remaining)]
|
184
|
+
|
185
|
+
if pos == len(bounds):
|
186
|
+
return [()] if remaining == 0 else None
|
187
|
+
|
188
|
+
max_len: int
|
189
|
+
min_len, max_len = bounds[pos]
|
190
|
+
if max_len == MAXREPEAT:
|
191
|
+
max_len = remaining + 1
|
192
|
+
else:
|
193
|
+
max_len += 1
|
194
|
+
|
195
|
+
# Try each possible length for current quantifier
|
196
|
+
for length in range(min_len, max_len):
|
197
|
+
rest = find_valid_combination(pos + 1, remaining - length)
|
198
|
+
if rest is not None:
|
199
|
+
dp[(pos, remaining)] = [(length,) + r for r in rest]
|
200
|
+
return dp[(pos, remaining)]
|
201
|
+
|
202
|
+
dp[(pos, remaining)] = None
|
203
|
+
return None
|
204
|
+
|
205
|
+
distribution = find_valid_combination(0, target)
|
206
|
+
if distribution:
|
207
|
+
return [(length, length) for length in distribution[0]]
|
208
|
+
return None
|
209
|
+
|
210
|
+
# Handle range case by distributing min/max bounds
|
211
|
+
result = []
|
212
|
+
remaining_min = min_length or 0
|
213
|
+
remaining_max = max_length or MAXREPEAT
|
214
|
+
|
215
|
+
for min_repeat, max_repeat in bounds:
|
216
|
+
if remaining_min > 0:
|
217
|
+
part_min = min(max_repeat, max(min_repeat, remaining_min))
|
218
|
+
else:
|
219
|
+
part_min = min_repeat
|
220
|
+
|
221
|
+
if remaining_max < MAXREPEAT:
|
222
|
+
part_max = min(max_repeat, remaining_max)
|
223
|
+
else:
|
224
|
+
part_max = max_repeat
|
225
|
+
|
226
|
+
if part_min > part_max:
|
227
|
+
return None
|
228
|
+
|
229
|
+
result.append((part_min, part_max))
|
230
|
+
|
231
|
+
remaining_min = max(0, remaining_min - part_min)
|
232
|
+
remaining_max -= part_max if part_max != MAXREPEAT else 0
|
233
|
+
|
234
|
+
if remaining_min > 0 or remaining_max < 0:
|
235
|
+
return None
|
236
|
+
|
237
|
+
return result
|
238
|
+
|
239
|
+
|
72
240
|
def _get_anchor_length(node_type: int) -> int:
|
73
241
|
"""Determine the length of the anchor based on its type."""
|
74
242
|
if node_type in {sre.AT_BEGINNING_STRING, sre.AT_END_STRING, sre.AT_BOUNDARY, sre.AT_NON_BOUNDARY}:
|
@@ -93,13 +261,13 @@ def _handle_repeat_quantifier(
|
|
93
261
|
min_length, max_length = _build_size(min_repeat, max_repeat, min_length, max_length)
|
94
262
|
if min_length > max_length:
|
95
263
|
return pattern
|
96
|
-
return f"({_strip_quantifier(pattern)})" + _build_quantifier(min_length, max_length)
|
264
|
+
return f"({_strip_quantifier(pattern).strip(')(')})" + _build_quantifier(min_length, max_length)
|
97
265
|
|
98
266
|
|
99
267
|
def _handle_literal_or_in_quantifier(pattern: str, min_length: int | None, max_length: int | None) -> str:
|
100
268
|
"""Handle literal or character class quantifiers."""
|
101
269
|
min_length = 1 if min_length is None else max(min_length, 1)
|
102
|
-
return f"({pattern})" + _build_quantifier(min_length, max_length)
|
270
|
+
return f"({pattern.strip(')(')})" + _build_quantifier(min_length, max_length)
|
103
271
|
|
104
272
|
|
105
273
|
def _build_quantifier(minimum: int | None, maximum: int | None) -> str:
|