StringGenerator 0.5.0__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/PKG-INFO +1 -1
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/StringGenerator.egg-info/PKG-INFO +1 -1
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/strgen/__init__.py +48 -4
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/strgen/tests.py +16 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/LICENSE +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/README.rst +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/StringGenerator.egg-info/SOURCES.txt +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/StringGenerator.egg-info/dependency_links.txt +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/StringGenerator.egg-info/top_level.txt +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/pyproject.toml +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/setup.cfg +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/setup.py +0 -0
- {stringgenerator-0.5.0 → stringgenerator-0.5.1}/strgen/countries.py +0 -0
|
@@ -42,7 +42,7 @@ from abc import ABC, abstractmethod
|
|
|
42
42
|
from collections import Counter, namedtuple
|
|
43
43
|
from math import factorial
|
|
44
44
|
|
|
45
|
-
__version__ = "0.5.
|
|
45
|
+
__version__ = "0.5.1"
|
|
46
46
|
__author__ = "Paul Wolf"
|
|
47
47
|
__license__ = "BSD"
|
|
48
48
|
|
|
@@ -306,9 +306,29 @@ class StringGenerator:
|
|
|
306
306
|
return "".join(char_list)
|
|
307
307
|
|
|
308
308
|
def count(self, randomizer, **kwargs):
|
|
309
|
-
"""
|
|
310
|
-
|
|
311
|
-
|
|
309
|
+
"""Number of distinct outcomes of a permutation ('&') of the operands.
|
|
310
|
+
|
|
311
|
+
'&' shuffles together the characters produced by all operands. When
|
|
312
|
+
every operand is fixed -- it has exactly one possible value, i.e.
|
|
313
|
+
``count() == 1`` -- the multiset of characters is known, and the
|
|
314
|
+
answer is just the number of distinct permutations of that multiset.
|
|
315
|
+
|
|
316
|
+
When an operand can vary (e.g. a character set), the set of
|
|
317
|
+
characters being shuffled changes with each random draw, so there is
|
|
318
|
+
no single well-defined count. Rather than return a misleading,
|
|
319
|
+
draw-dependent number (the previous behaviour) we raise
|
|
320
|
+
NotImplementedError. See ``StringGenerator.count`` for the full set
|
|
321
|
+
of assumptions behind counting.
|
|
322
|
+
"""
|
|
323
|
+
operand_counts = [node.count(randomizer, **kwargs) for node in self.seq]
|
|
324
|
+
if all(c == 1 for c in operand_counts):
|
|
325
|
+
# every operand is fixed, so the multiset of characters is known
|
|
326
|
+
chars = "".join(node.render(randomizer, **kwargs) for node in self.seq)
|
|
327
|
+
return permutation_count(chars)
|
|
328
|
+
raise NotImplementedError(
|
|
329
|
+
"count() is undefined for '&' over operands that are not fixed; "
|
|
330
|
+
"the result would depend on the random draw"
|
|
331
|
+
)
|
|
312
332
|
|
|
313
333
|
def dump(self, level=-1):
|
|
314
334
|
print((StringGenerator.mytab * level) + repr(self))
|
|
@@ -706,6 +726,30 @@ class StringGenerator:
|
|
|
706
726
|
return self.seq.render(self.randomizer, **kwargs)
|
|
707
727
|
|
|
708
728
|
def count(self, **kwargs) -> int:
|
|
729
|
+
r"""Return the size of the generation sample space for the template.
|
|
730
|
+
|
|
731
|
+
This is the number of distinct strings the template can produce, but
|
|
732
|
+
only under the following assumptions. Where they do not hold, the value
|
|
733
|
+
is the size of the *generation* space (the number of ways the template
|
|
734
|
+
can be filled in), which may exceed the number of distinct strings:
|
|
735
|
+
|
|
736
|
+
* **Character classes contain no duplicate characters.** ``len(chars)``
|
|
737
|
+
is used as the alphabet size, so a class with repeats (e.g.
|
|
738
|
+
``[a\d\d]``) counts each repeat as a separate option and overcounts.
|
|
739
|
+
The generator also weights repeated characters more heavily when
|
|
740
|
+
rendering, so this number reflects that weighting.
|
|
741
|
+
* **Alternation (``|``) branches are disjoint.** The count sums the
|
|
742
|
+
branch sizes, which equals the number of distinct results only if no
|
|
743
|
+
two branches can produce the same string; overlapping branches
|
|
744
|
+
overcount.
|
|
745
|
+
* **Permutation (``&``) is applied only to fixed operands.** For ``&``
|
|
746
|
+
over operands that can vary, the count depends on the random draw, so
|
|
747
|
+
``count()`` raises NotImplementedError instead of guessing.
|
|
748
|
+
|
|
749
|
+
``count()`` also raises NotImplementedError if the template contains a
|
|
750
|
+
``${...}`` source, since a source may be an arbitrary callable or list
|
|
751
|
+
whose size is unknown.
|
|
752
|
+
"""
|
|
709
753
|
return self.seq.count(self.randomizer, **kwargs)
|
|
710
754
|
|
|
711
755
|
def dump(self, cnt=None, **kwargs):
|
|
@@ -411,6 +411,22 @@ class TestSG(unittest.TestCase):
|
|
|
411
411
|
SG(r"[\u\d]{2}|[abc]{3}", uaf=100).render_list(1323, unique=True)
|
|
412
412
|
)
|
|
413
413
|
|
|
414
|
+
def test_count_and_operator(self):
|
|
415
|
+
"""count() over '&' is deterministic and only defined for fixed operands."""
|
|
416
|
+
# Fixed (literal) operands: distinct permutations of "1abc" = 4! = 24.
|
|
417
|
+
sg = SG(r"1&abc")
|
|
418
|
+
assert sg.count() == 24
|
|
419
|
+
assert sg.count() == len(sg.render_set(24))
|
|
420
|
+
|
|
421
|
+
# Deterministic across repeated calls. Previously this rendered once and
|
|
422
|
+
# counted that single sample, so the value varied with the random draw.
|
|
423
|
+
assert len({SG(r"1&abc").count() for _ in range(20)}) == 1
|
|
424
|
+
|
|
425
|
+
# Operands that can vary have no single well-defined count: raise rather
|
|
426
|
+
# than return a draw-dependent number.
|
|
427
|
+
with self.assertRaises(NotImplementedError):
|
|
428
|
+
SG(r"[\d]{2}&[\d]{1}").count()
|
|
429
|
+
|
|
414
430
|
def test_probabilistic_or(self):
|
|
415
431
|
d = SG("0|1|2|3|4|5|6|7|8|9").render_list(10000)
|
|
416
432
|
d = [int(d) for d in d]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{stringgenerator-0.5.0 → stringgenerator-0.5.1}/StringGenerator.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|