schemathesis 4.0.0a2__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- schemathesis/cli/__init__.py +12 -1
- schemathesis/cli/commands/run/events.py +22 -2
- schemathesis/cli/commands/run/executor.py +3 -0
- schemathesis/cli/commands/run/handlers/output.py +108 -71
- schemathesis/core/errors.py +8 -0
- schemathesis/engine/core.py +1 -1
- schemathesis/engine/errors.py +11 -5
- schemathesis/engine/phases/stateful/__init__.py +1 -0
- schemathesis/engine/phases/stateful/_executor.py +8 -11
- schemathesis/engine/recorder.py +22 -21
- schemathesis/errors.py +19 -13
- schemathesis/generation/coverage.py +4 -4
- schemathesis/generation/hypothesis/builder.py +15 -12
- schemathesis/generation/stateful/state_machine.py +8 -9
- schemathesis/specs/openapi/checks.py +50 -27
- schemathesis/specs/openapi/links.py +18 -4
- schemathesis/specs/openapi/patterns.py +170 -2
- schemathesis/specs/openapi/schemas.py +54 -26
- schemathesis/specs/openapi/stateful/__init__.py +124 -74
- schemathesis/specs/openapi/stateful/control.py +87 -0
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a3.dist-info}/METADATA +1 -1
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a3.dist-info}/RECORD +25 -24
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a3.dist-info}/WHEEL +0 -0
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a3.dist-info}/entry_points.txt +0 -0
- {schemathesis-4.0.0a2.dist-info → schemathesis-4.0.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -78,7 +78,6 @@ def execute_state_machine_loop(
|
|
78
78
|
self._start_time = time.monotonic()
|
79
79
|
self._scenario_id = scenario_started.id
|
80
80
|
event_queue.put(scenario_started)
|
81
|
-
self.recorder = ScenarioRecorder(label="Stateful tests")
|
82
81
|
self._check_ctx = engine.get_check_context(self.recorder)
|
83
82
|
|
84
83
|
def get_call_kwargs(self, case: Case) -> dict[str, Any]:
|
@@ -100,12 +99,6 @@ def execute_state_machine_loop(
|
|
100
99
|
def step(self, input: StepInput) -> StepOutput | None:
|
101
100
|
# Checking the stop event once inside `step` is sufficient as it is called frequently
|
102
101
|
# The idea is to stop the execution as soon as possible
|
103
|
-
if input.transition is not None:
|
104
|
-
self.recorder.record_case(
|
105
|
-
parent_id=input.transition.parent_id, transition=input.transition, case=input.case
|
106
|
-
)
|
107
|
-
else:
|
108
|
-
self.recorder.record_case(parent_id=None, transition=None, case=input.case)
|
109
102
|
if engine.has_to_stop:
|
110
103
|
raise KeyboardInterrupt
|
111
104
|
try:
|
@@ -176,10 +169,7 @@ def execute_state_machine_loop(
|
|
176
169
|
ctx.reset_scenario()
|
177
170
|
super().teardown()
|
178
171
|
|
179
|
-
|
180
|
-
InstrumentedStateMachine = hypothesis.seed(config.execution.seed)(_InstrumentedStateMachine)
|
181
|
-
else:
|
182
|
-
InstrumentedStateMachine = _InstrumentedStateMachine
|
172
|
+
seed = config.execution.seed
|
183
173
|
|
184
174
|
while True:
|
185
175
|
# This loop is running until no new failures are found in a single iteration
|
@@ -197,6 +187,13 @@ def execute_state_machine_loop(
|
|
197
187
|
)
|
198
188
|
break
|
199
189
|
suite_status = Status.SUCCESS
|
190
|
+
if seed is not None:
|
191
|
+
InstrumentedStateMachine = hypothesis.seed(seed)(_InstrumentedStateMachine)
|
192
|
+
# Predictably change the seed to avoid re-running the same sequences if tests fail
|
193
|
+
# yet have reproducible results
|
194
|
+
seed += 1
|
195
|
+
else:
|
196
|
+
InstrumentedStateMachine = _InstrumentedStateMachine
|
200
197
|
try:
|
201
198
|
with catch_warnings(), ignore_hypothesis_output(): # type: ignore
|
202
199
|
InstrumentedStateMachine.run(settings=config.execution.hypothesis_settings)
|
schemathesis/engine/recorder.py
CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import base64
|
4
4
|
import time
|
5
|
-
import uuid
|
6
5
|
from dataclasses import dataclass
|
7
6
|
from typing import TYPE_CHECKING, Iterator, cast
|
8
7
|
|
@@ -24,7 +23,6 @@ class ScenarioRecorder:
|
|
24
23
|
Records test cases, their hierarchy, API interactions, and results of checks performed during execution.
|
25
24
|
"""
|
26
25
|
|
27
|
-
id: uuid.UUID
|
28
26
|
# Human-readable label
|
29
27
|
label: str
|
30
28
|
|
@@ -35,10 +33,9 @@ class ScenarioRecorder:
|
|
35
33
|
# Network interactions by test case ID
|
36
34
|
interactions: dict[str, Interaction]
|
37
35
|
|
38
|
-
__slots__ = ("
|
36
|
+
__slots__ = ("label", "status", "roots", "cases", "checks", "interactions")
|
39
37
|
|
40
38
|
def __init__(self, *, label: str) -> None:
|
41
|
-
self.id = uuid.uuid4()
|
42
39
|
self.label = label
|
43
40
|
self.cases = {}
|
44
41
|
self.checks = {}
|
@@ -96,30 +93,34 @@ class ScenarioRecorder:
|
|
96
93
|
return None
|
97
94
|
|
98
95
|
def find_related(self, *, case_id: str) -> Iterator[Case]:
|
99
|
-
"""Iterate over all
|
100
|
-
|
101
|
-
seen = {current_id}
|
96
|
+
"""Iterate over all cases in the tree, starting from the root."""
|
97
|
+
seen = {case_id}
|
102
98
|
|
99
|
+
# First, find the root by going up
|
100
|
+
current_id = case_id
|
103
101
|
while True:
|
104
102
|
current_node = self.cases.get(current_id)
|
105
103
|
if current_node is None or current_node.parent_id is None:
|
104
|
+
root_id = current_id
|
106
105
|
break
|
106
|
+
current_id = current_node.parent_id
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
if parent_id ==
|
108
|
+
# Then traverse the whole tree from root
|
109
|
+
def traverse(node_id: str) -> Iterator[Case]:
|
110
|
+
# Get all children
|
111
|
+
for case_id, node in self.cases.items():
|
112
|
+
if node.parent_id == node_id and case_id not in seen:
|
113
113
|
seen.add(case_id)
|
114
|
-
yield
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
114
|
+
yield node.value
|
115
|
+
# Recurse into children
|
116
|
+
yield from traverse(case_id)
|
117
|
+
|
118
|
+
# Start traversal from root
|
119
|
+
root_node = self.cases.get(root_id)
|
120
|
+
if root_node and root_id not in seen:
|
121
|
+
seen.add(root_id)
|
122
|
+
yield root_node.value
|
123
|
+
yield from traverse(root_id)
|
123
124
|
|
124
125
|
def find_response(self, *, case_id: str) -> Response | None:
|
125
126
|
"""Retrieve the API response for a given test case, if available."""
|
schemathesis/errors.py
CHANGED
@@ -1,29 +1,35 @@
|
|
1
1
|
"""Public Schemathesis errors."""
|
2
2
|
|
3
|
-
from schemathesis.core.errors import
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
3
|
+
from schemathesis.core.errors import (
|
4
|
+
IncorrectUsage,
|
5
|
+
InternalError,
|
6
|
+
InvalidHeadersExample,
|
7
|
+
InvalidLinkDefinition,
|
8
|
+
InvalidRateLimit,
|
9
|
+
InvalidRegexPattern,
|
10
|
+
InvalidRegexType,
|
11
|
+
InvalidSchema,
|
12
|
+
LoaderError,
|
13
|
+
NoLinksFound,
|
14
|
+
OperationNotFound,
|
15
|
+
SchemathesisError,
|
16
|
+
SerializationError,
|
17
|
+
SerializationNotPossible,
|
18
|
+
UnboundPrefix,
|
19
|
+
)
|
16
20
|
|
17
21
|
__all__ = [
|
18
22
|
"IncorrectUsage",
|
19
23
|
"InternalError",
|
20
24
|
"InvalidHeadersExample",
|
25
|
+
"InvalidLinkDefinition",
|
21
26
|
"InvalidRateLimit",
|
22
27
|
"InvalidRegexPattern",
|
23
28
|
"InvalidRegexType",
|
24
29
|
"InvalidSchema",
|
25
30
|
"LoaderError",
|
26
31
|
"OperationNotFound",
|
32
|
+
"NoLinksFound",
|
27
33
|
"SchemathesisError",
|
28
34
|
"SerializationError",
|
29
35
|
"SerializationNotPossible",
|
@@ -194,6 +194,10 @@ class CoverageContext:
|
|
194
194
|
re.compile(pattern)
|
195
195
|
except re.error:
|
196
196
|
raise Unsatisfiable from None
|
197
|
+
if "minLength" in schema or "maxLength" in schema:
|
198
|
+
min_length = schema.get("minLength")
|
199
|
+
max_length = schema.get("maxLength")
|
200
|
+
pattern = update_quantifier(pattern, min_length, max_length)
|
197
201
|
return cached_draw(st.from_regex(pattern))
|
198
202
|
if (keys == ["items", "type"] or keys == ["items", "minItems", "type"]) and isinstance(schema["items"], dict):
|
199
203
|
items = schema["items"]
|
@@ -514,11 +518,7 @@ def _positive_string(ctx: CoverageContext, schema: dict) -> Generator[GeneratedV
|
|
514
518
|
# Default positive value
|
515
519
|
yield PositiveValue(ctx.generate_from_schema(schema), description="Valid string")
|
516
520
|
elif "pattern" in schema:
|
517
|
-
# Without merging `maxLength` & `minLength` into a regex it is problematic
|
518
|
-
# to generate a valid value as the unredlying machinery will resort to filtering
|
519
|
-
# and it is unlikely that it will generate a string of that length
|
520
521
|
yield PositiveValue(ctx.generate_from_schema(schema), description="Valid string")
|
521
|
-
return
|
522
522
|
|
523
523
|
seen = set()
|
524
524
|
|
@@ -386,19 +386,22 @@ def _iter_coverage_cases(
|
|
386
386
|
container = template["query"]
|
387
387
|
for parameter in operation.query:
|
388
388
|
instant = Instant()
|
389
|
-
value
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
389
|
+
# Could be absent if value schema can't be negated
|
390
|
+
# I.e. contains just `default` value without any other keywords
|
391
|
+
value = container.get(parameter.name, NOT_SET)
|
392
|
+
if value is not NOT_SET:
|
393
|
+
yield operation.Case(
|
394
|
+
**{**template, "query": {**container, parameter.name: [value, value]}},
|
395
|
+
meta=CaseMetadata(
|
396
|
+
generation=GenerationInfo(time=instant.elapsed, mode=GenerationMode.NEGATIVE),
|
397
|
+
components={},
|
398
|
+
phase=PhaseInfo.coverage(
|
399
|
+
description=f"Duplicate `{parameter.name}` query parameter",
|
400
|
+
parameter=parameter.name,
|
401
|
+
parameter_location="query",
|
402
|
+
),
|
399
403
|
),
|
400
|
-
)
|
401
|
-
)
|
404
|
+
)
|
402
405
|
# Generate missing required parameters
|
403
406
|
for parameter in operation.iter_parameters():
|
404
407
|
if parameter.is_required and parameter.location != "path":
|
@@ -10,7 +10,7 @@ from hypothesis.errors import InvalidDefinition
|
|
10
10
|
from hypothesis.stateful import RuleBasedStateMachine
|
11
11
|
|
12
12
|
from schemathesis.checks import CheckFunction
|
13
|
-
from schemathesis.core.errors import
|
13
|
+
from schemathesis.core.errors import NoLinksFound
|
14
14
|
from schemathesis.core.result import Result
|
15
15
|
from schemathesis.core.transport import Response
|
16
16
|
from schemathesis.generation.case import Case
|
@@ -22,16 +22,11 @@ if TYPE_CHECKING:
|
|
22
22
|
from schemathesis.schemas import BaseSchema
|
23
23
|
|
24
24
|
|
25
|
-
|
26
|
-
"Stateful testing requires at least one OpenAPI link in the schema, but no links detected. "
|
27
|
-
"Please add OpenAPI links to enable stateful testing or use stateless tests instead. \n"
|
28
|
-
"See https://schemathesis.readthedocs.io/en/stable/stateful.html#how-to-specify-connections for more information."
|
29
|
-
)
|
30
|
-
|
25
|
+
DEFAULT_STATEFUL_STEP_COUNT = 6
|
31
26
|
DEFAULT_STATE_MACHINE_SETTINGS = hypothesis.settings(
|
32
27
|
phases=[hypothesis.Phase.generate],
|
33
28
|
deadline=None,
|
34
|
-
stateful_step_count=
|
29
|
+
stateful_step_count=DEFAULT_STATEFUL_STEP_COUNT,
|
35
30
|
suppress_health_check=list(hypothesis.HealthCheck),
|
36
31
|
)
|
37
32
|
|
@@ -104,7 +99,11 @@ class APIStateMachine(RuleBasedStateMachine):
|
|
104
99
|
super().__init__() # type: ignore
|
105
100
|
except InvalidDefinition as exc:
|
106
101
|
if "defines no rules" in str(exc):
|
107
|
-
|
102
|
+
if not self.schema.statistic.links.total:
|
103
|
+
message = "Schema contains no link definitions required for stateful testing"
|
104
|
+
else:
|
105
|
+
message = "All link definitions required for stateful testing are excluded by filters"
|
106
|
+
raise NoLinksFound(message) from None
|
108
107
|
raise
|
109
108
|
self.setup()
|
110
109
|
|
@@ -376,6 +376,10 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
|
|
376
376
|
if not isinstance(case.operation.schema, BaseOpenAPISchema):
|
377
377
|
return True
|
378
378
|
|
379
|
+
# First, check if this is a 4XX response
|
380
|
+
if not (400 <= response.status_code < 500):
|
381
|
+
return None
|
382
|
+
|
379
383
|
parent = ctx.find_parent(case_id=case.id)
|
380
384
|
if parent is None:
|
381
385
|
return None
|
@@ -383,6 +387,17 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
|
|
383
387
|
if parent_response is None:
|
384
388
|
return None
|
385
389
|
|
390
|
+
if not (
|
391
|
+
parent.operation.method.upper() == "POST"
|
392
|
+
and 200 <= parent_response.status_code < 400
|
393
|
+
and _is_prefix_operation(
|
394
|
+
ResourcePath(parent.path, parent.path_parameters or {}),
|
395
|
+
ResourcePath(case.path, case.path_parameters or {}),
|
396
|
+
)
|
397
|
+
):
|
398
|
+
return None
|
399
|
+
|
400
|
+
# Check if all parameters come from links
|
386
401
|
overrides = case._override
|
387
402
|
overrides_all_parameters = True
|
388
403
|
for parameter in case.operation.iter_parameters():
|
@@ -390,34 +405,42 @@ def ensure_resource_availability(ctx: CheckContext, response: Response, case: Ca
|
|
390
405
|
if parameter.name not in getattr(overrides, container, {}):
|
391
406
|
overrides_all_parameters = False
|
392
407
|
break
|
408
|
+
if not overrides_all_parameters:
|
409
|
+
return None
|
393
410
|
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
)
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
411
|
+
# Look for any successful DELETE operations on this resource
|
412
|
+
for related_case in ctx.find_related(case_id=case.id):
|
413
|
+
related_response = ctx.find_response(case_id=related_case.id)
|
414
|
+
if (
|
415
|
+
related_case.operation.method.upper() == "DELETE"
|
416
|
+
and related_response is not None
|
417
|
+
and 200 <= related_response.status_code < 300
|
418
|
+
and _is_prefix_operation(
|
419
|
+
ResourcePath(related_case.path, related_case.path_parameters or {}),
|
420
|
+
ResourcePath(case.path, case.path_parameters or {}),
|
421
|
+
)
|
422
|
+
):
|
423
|
+
# Resource was properly deleted, 404 is expected
|
424
|
+
return None
|
425
|
+
|
426
|
+
# If we got here:
|
427
|
+
# 1. Resource was created successfully
|
428
|
+
# 2. Current operation returned 4XX
|
429
|
+
# 3. All parameters come from links
|
430
|
+
# 4. No successful DELETE operations found
|
431
|
+
created_with = parent.operation.label
|
432
|
+
not_available_with = case.operation.label
|
433
|
+
reason = http.client.responses.get(response.status_code, "Unknown")
|
434
|
+
raise EnsureResourceAvailability(
|
435
|
+
operation=created_with,
|
436
|
+
message=(
|
437
|
+
f"The API returned `{response.status_code} {reason}` for a resource that was just created.\n\n"
|
438
|
+
f"Created with : `{created_with}`\n"
|
439
|
+
f"Not available with: `{not_available_with}`"
|
440
|
+
),
|
441
|
+
created_with=created_with,
|
442
|
+
not_available_with=not_available_with,
|
443
|
+
)
|
421
444
|
|
422
445
|
|
423
446
|
class AuthKind(enum.Enum):
|
@@ -2,9 +2,10 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
4
|
from functools import lru_cache
|
5
|
-
from typing import TYPE_CHECKING, Any, Generator, Literal, Union, cast
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Generator, Literal, Union, cast
|
6
6
|
|
7
7
|
from schemathesis.core import NOT_SET, NotSet
|
8
|
+
from schemathesis.core.errors import InvalidLinkDefinition, InvalidSchema, OperationNotFound
|
8
9
|
from schemathesis.core.result import Err, Ok, Result
|
9
10
|
from schemathesis.generation.stateful.state_machine import ExtractedParam, StepOutput, Transition
|
10
11
|
from schemathesis.schemas import APIOperation
|
@@ -48,14 +49,27 @@ class OpenApiLink:
|
|
48
49
|
__slots__ = ("name", "status_code", "source", "target", "parameters", "body", "merge_body", "_cached_extract")
|
49
50
|
|
50
51
|
def __init__(self, name: str, status_code: str, definition: dict[str, Any], source: APIOperation):
|
52
|
+
from schemathesis.specs.openapi.schemas import BaseOpenAPISchema
|
53
|
+
|
51
54
|
self.name = name
|
52
55
|
self.status_code = status_code
|
53
56
|
self.source = source
|
57
|
+
assert isinstance(source.schema, BaseOpenAPISchema)
|
54
58
|
|
59
|
+
get_operation: Callable[[str], APIOperation]
|
55
60
|
if "operationId" in definition:
|
56
|
-
|
61
|
+
operation_reference = definition["operationId"]
|
62
|
+
get_operation = source.schema.get_operation_by_id
|
57
63
|
else:
|
58
|
-
|
64
|
+
operation_reference = definition["operationRef"]
|
65
|
+
get_operation = source.schema.get_operation_by_reference
|
66
|
+
|
67
|
+
try:
|
68
|
+
self.target = get_operation(operation_reference)
|
69
|
+
except OperationNotFound as exc:
|
70
|
+
raise InvalidLinkDefinition(
|
71
|
+
f"Link '{name}' references non-existent operation '{operation_reference}' from {status_code} response of '{source.label}'"
|
72
|
+
) from exc
|
59
73
|
|
60
74
|
extension = definition.get(SCHEMATHESIS_LINK_EXTENSION)
|
61
75
|
self.parameters = self._normalize_parameters(definition.get("parameters", {}))
|
@@ -92,7 +106,7 @@ class OpenApiLink:
|
|
92
106
|
for param in self.target.iter_parameters():
|
93
107
|
if param.name == name:
|
94
108
|
return LOCATION_TO_CONTAINER[param.location]
|
95
|
-
raise
|
109
|
+
raise InvalidSchema(f"Parameter `{name}` is not defined in API operation `{self.target.label}`")
|
96
110
|
|
97
111
|
def extract(self, output: StepOutput) -> Transition:
|
98
112
|
return self._cached_extract(StepOutputWrapper(output))
|
@@ -66,9 +66,177 @@ def _handle_parsed_pattern(parsed: list, pattern: str, min_length: int | None, m
|
|
66
66
|
)
|
67
67
|
+ trailing_anchor
|
68
68
|
)
|
69
|
+
elif (
|
70
|
+
len(parsed) > 3
|
71
|
+
and parsed[0][0] == ANCHOR
|
72
|
+
and parsed[-1][0] == ANCHOR
|
73
|
+
and all(op == LITERAL or op in REPEATS for op, _ in parsed[1:-1])
|
74
|
+
):
|
75
|
+
return _handle_anchored_pattern(parsed, pattern, min_length, max_length)
|
69
76
|
return pattern
|
70
77
|
|
71
78
|
|
79
|
+
def _handle_anchored_pattern(parsed: list, pattern: str, min_length: int | None, max_length: int | None) -> str:
|
80
|
+
"""Update regex pattern with multiple quantified patterns to satisfy length constraints."""
|
81
|
+
# Extract anchors
|
82
|
+
leading_anchor_length = _get_anchor_length(parsed[0][1])
|
83
|
+
trailing_anchor_length = _get_anchor_length(parsed[-1][1])
|
84
|
+
leading_anchor = pattern[:leading_anchor_length]
|
85
|
+
trailing_anchor = pattern[-trailing_anchor_length:]
|
86
|
+
|
87
|
+
pattern_parts = parsed[1:-1]
|
88
|
+
|
89
|
+
# Adjust length constraints by subtracting fixed literals length
|
90
|
+
fixed_length = sum(1 for op, _ in pattern_parts if op == LITERAL)
|
91
|
+
if min_length is not None:
|
92
|
+
min_length -= fixed_length
|
93
|
+
if min_length < 0:
|
94
|
+
return pattern
|
95
|
+
if max_length is not None:
|
96
|
+
max_length -= fixed_length
|
97
|
+
if max_length < 0:
|
98
|
+
return pattern
|
99
|
+
|
100
|
+
# Extract only min/max bounds from quantified parts
|
101
|
+
quantifier_bounds = [value[:2] for op, value in pattern_parts if op in REPEATS]
|
102
|
+
|
103
|
+
if not quantifier_bounds:
|
104
|
+
return pattern
|
105
|
+
|
106
|
+
length_distribution = _distribute_length_constraints(quantifier_bounds, min_length, max_length)
|
107
|
+
if not length_distribution:
|
108
|
+
return pattern
|
109
|
+
|
110
|
+
# Rebuild pattern with updated quantifiers
|
111
|
+
result = leading_anchor
|
112
|
+
current_position = leading_anchor_length
|
113
|
+
distribution_idx = 0
|
114
|
+
|
115
|
+
for op, value in pattern_parts:
|
116
|
+
if op == LITERAL:
|
117
|
+
if pattern[current_position] == "\\":
|
118
|
+
# Escaped value
|
119
|
+
current_position += 2
|
120
|
+
result += "\\"
|
121
|
+
else:
|
122
|
+
current_position += 1
|
123
|
+
result += chr(value)
|
124
|
+
else:
|
125
|
+
new_min, new_max = length_distribution[distribution_idx]
|
126
|
+
next_position = _find_quantified_end(pattern, current_position)
|
127
|
+
quantified_segment = pattern[current_position:next_position]
|
128
|
+
_, _, subpattern = value
|
129
|
+
new_value = (new_min, new_max, subpattern)
|
130
|
+
|
131
|
+
result += _update_quantifier(op, new_value, quantified_segment, new_min, new_max)
|
132
|
+
current_position = next_position
|
133
|
+
distribution_idx += 1
|
134
|
+
|
135
|
+
return result + trailing_anchor
|
136
|
+
|
137
|
+
|
138
|
+
def _find_quantified_end(pattern: str, start: int) -> int:
|
139
|
+
"""Find the end position of current quantified part."""
|
140
|
+
char_class_level = 0
|
141
|
+
group_level = 0
|
142
|
+
|
143
|
+
for i in range(start, len(pattern)):
|
144
|
+
char = pattern[i]
|
145
|
+
|
146
|
+
# Handle character class nesting
|
147
|
+
if char == "[":
|
148
|
+
char_class_level += 1
|
149
|
+
elif char == "]":
|
150
|
+
char_class_level -= 1
|
151
|
+
|
152
|
+
# Handle group nesting
|
153
|
+
elif char == "(":
|
154
|
+
group_level += 1
|
155
|
+
elif char == ")":
|
156
|
+
group_level -= 1
|
157
|
+
|
158
|
+
# Only process quantifiers when we're not inside any nested structure
|
159
|
+
elif char_class_level == 0 and group_level == 0:
|
160
|
+
if char in "*+?":
|
161
|
+
return i + 1
|
162
|
+
elif char == "{":
|
163
|
+
# Find matching }
|
164
|
+
while i < len(pattern) and pattern[i] != "}":
|
165
|
+
i += 1
|
166
|
+
return i + 1
|
167
|
+
|
168
|
+
return len(pattern)
|
169
|
+
|
170
|
+
|
171
|
+
def _distribute_length_constraints(
|
172
|
+
bounds: list[tuple[int, int]], min_length: int | None, max_length: int | None
|
173
|
+
) -> list[tuple[int, int]] | None:
|
174
|
+
"""Distribute length constraints among quantified pattern parts."""
|
175
|
+
# Handle exact length case with dynamic programming
|
176
|
+
if min_length == max_length:
|
177
|
+
assert min_length is not None
|
178
|
+
target = min_length
|
179
|
+
dp: dict[tuple[int, int], list[tuple[int, ...]] | None] = {}
|
180
|
+
|
181
|
+
def find_valid_combination(pos: int, remaining: int) -> list[tuple[int, ...]] | None:
|
182
|
+
if (pos, remaining) in dp:
|
183
|
+
return dp[(pos, remaining)]
|
184
|
+
|
185
|
+
if pos == len(bounds):
|
186
|
+
return [()] if remaining == 0 else None
|
187
|
+
|
188
|
+
max_len: int
|
189
|
+
min_len, max_len = bounds[pos]
|
190
|
+
if max_len == MAXREPEAT:
|
191
|
+
max_len = remaining + 1
|
192
|
+
else:
|
193
|
+
max_len += 1
|
194
|
+
|
195
|
+
# Try each possible length for current quantifier
|
196
|
+
for length in range(min_len, max_len):
|
197
|
+
rest = find_valid_combination(pos + 1, remaining - length)
|
198
|
+
if rest is not None:
|
199
|
+
dp[(pos, remaining)] = [(length,) + r for r in rest]
|
200
|
+
return dp[(pos, remaining)]
|
201
|
+
|
202
|
+
dp[(pos, remaining)] = None
|
203
|
+
return None
|
204
|
+
|
205
|
+
distribution = find_valid_combination(0, target)
|
206
|
+
if distribution:
|
207
|
+
return [(length, length) for length in distribution[0]]
|
208
|
+
return None
|
209
|
+
|
210
|
+
# Handle range case by distributing min/max bounds
|
211
|
+
result = []
|
212
|
+
remaining_min = min_length or 0
|
213
|
+
remaining_max = max_length or MAXREPEAT
|
214
|
+
|
215
|
+
for min_repeat, max_repeat in bounds:
|
216
|
+
if remaining_min > 0:
|
217
|
+
part_min = min(max_repeat, max(min_repeat, remaining_min))
|
218
|
+
else:
|
219
|
+
part_min = min_repeat
|
220
|
+
|
221
|
+
if remaining_max < MAXREPEAT:
|
222
|
+
part_max = min(max_repeat, remaining_max)
|
223
|
+
else:
|
224
|
+
part_max = max_repeat
|
225
|
+
|
226
|
+
if part_min > part_max:
|
227
|
+
return None
|
228
|
+
|
229
|
+
result.append((part_min, part_max))
|
230
|
+
|
231
|
+
remaining_min = max(0, remaining_min - part_min)
|
232
|
+
remaining_max -= part_max if part_max != MAXREPEAT else 0
|
233
|
+
|
234
|
+
if remaining_min > 0 or remaining_max < 0:
|
235
|
+
return None
|
236
|
+
|
237
|
+
return result
|
238
|
+
|
239
|
+
|
72
240
|
def _get_anchor_length(node_type: int) -> int:
|
73
241
|
"""Determine the length of the anchor based on its type."""
|
74
242
|
if node_type in {sre.AT_BEGINNING_STRING, sre.AT_END_STRING, sre.AT_BOUNDARY, sre.AT_NON_BOUNDARY}:
|
@@ -93,13 +261,13 @@ def _handle_repeat_quantifier(
|
|
93
261
|
min_length, max_length = _build_size(min_repeat, max_repeat, min_length, max_length)
|
94
262
|
if min_length > max_length:
|
95
263
|
return pattern
|
96
|
-
return f"({_strip_quantifier(pattern)})" + _build_quantifier(min_length, max_length)
|
264
|
+
return f"({_strip_quantifier(pattern).strip(')(')})" + _build_quantifier(min_length, max_length)
|
97
265
|
|
98
266
|
|
99
267
|
def _handle_literal_or_in_quantifier(pattern: str, min_length: int | None, max_length: int | None) -> str:
|
100
268
|
"""Handle literal or character class quantifiers."""
|
101
269
|
min_length = 1 if min_length is None else max(min_length, 1)
|
102
|
-
return f"({pattern})" + _build_quantifier(min_length, max_length)
|
270
|
+
return f"({pattern.strip(')(')})" + _build_quantifier(min_length, max_length)
|
103
271
|
|
104
272
|
|
105
273
|
def _build_quantifier(minimum: int | None, maximum: int | None) -> str:
|