PyPI - pystylometry - Versions diffs - 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

pystylometry 1.0.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

pystylometry/README.md +42 -0
pystylometry/__init__.py +45 -3
pystylometry/_types.py +1017 -259
pystylometry/authorship/README.md +21 -0
pystylometry/authorship/__init__.py +28 -4
pystylometry/authorship/additional_methods.py +260 -40
pystylometry/authorship/compression.py +175 -0
pystylometry/authorship/kilgarriff.py +354 -0
pystylometry/character/README.md +17 -0
pystylometry/character/character_metrics.py +267 -179
pystylometry/cli.py +427 -0
pystylometry/consistency/README.md +27 -0
pystylometry/consistency/__init__.py +57 -0
pystylometry/consistency/_thresholds.py +162 -0
pystylometry/consistency/drift.py +549 -0
pystylometry/dialect/README.md +26 -0
pystylometry/dialect/__init__.py +65 -0
pystylometry/dialect/_data/dialect_markers.json +1134 -0
pystylometry/dialect/_loader.py +360 -0
pystylometry/dialect/detector.py +533 -0
pystylometry/lexical/README.md +23 -0
pystylometry/lexical/advanced_diversity.py +61 -22
pystylometry/lexical/function_words.py +255 -56
pystylometry/lexical/hapax.py +182 -52
pystylometry/lexical/mtld.py +108 -26
pystylometry/lexical/ttr.py +76 -10
pystylometry/lexical/word_frequency_sophistication.py +1522 -298
pystylometry/lexical/yule.py +136 -50
pystylometry/ngrams/README.md +18 -0
pystylometry/ngrams/entropy.py +150 -49
pystylometry/ngrams/extended_ngrams.py +314 -69
pystylometry/prosody/README.md +17 -0
pystylometry/prosody/rhythm_prosody.py +773 -11
pystylometry/readability/README.md +23 -0
pystylometry/readability/additional_formulas.py +1887 -762
pystylometry/readability/ari.py +144 -82
pystylometry/readability/coleman_liau.py +136 -109
pystylometry/readability/flesch.py +177 -73
pystylometry/readability/gunning_fog.py +165 -161
pystylometry/readability/smog.py +123 -42
pystylometry/stylistic/README.md +20 -0
pystylometry/stylistic/cohesion_coherence.py +669 -13
pystylometry/stylistic/genre_register.py +1560 -17
pystylometry/stylistic/markers.py +611 -17
pystylometry/stylistic/vocabulary_overlap.py +354 -13
pystylometry/syntactic/README.md +20 -0
pystylometry/syntactic/advanced_syntactic.py +76 -14
pystylometry/syntactic/pos_ratios.py +70 -6
pystylometry/syntactic/sentence_stats.py +55 -12
pystylometry/syntactic/sentence_types.py +71 -15
pystylometry/viz/README.md +27 -0
pystylometry/viz/__init__.py +71 -0
pystylometry/viz/drift.py +589 -0
pystylometry/viz/jsx/__init__.py +31 -0
pystylometry/viz/jsx/_base.py +144 -0
pystylometry/viz/jsx/report.py +677 -0
pystylometry/viz/jsx/timeline.py +716 -0
pystylometry/viz/jsx/viewer.py +1032 -0
pystylometry-1.3.0.dist-info/METADATA +136 -0
pystylometry-1.3.0.dist-info/RECORD +76 -0
{pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
pystylometry-1.0.0.dist-info/METADATA +0 -275
pystylometry-1.0.0.dist-info/RECORD +0 -46

pystylometry/lexical/hapax.py CHANGED Viewed

@@ -1,16 +1,87 @@
-"""Hapax legomena and related vocabulary richness metrics."""
+"""Hapax legomena and related vocabulary richness metrics.
+This module implements hapax metrics with native chunked analysis for
+stylometric fingerprinting.
+Related GitHub Issue:
+    #27 - Native chunked analysis with Distribution dataclass
+    https://github.com/craigtrim/pystylometry/issues/27
+"""
 import math
 from collections import Counter
-from .._types import HapaxLexiconResult, HapaxResult, LexiconCategories
+from .._types import (
+    Distribution,
+    HapaxLexiconResult,
+    HapaxResult,
+    LexiconCategories,
+    chunk_text,
+    make_distribution,
+)
 from .._utils import check_optional_dependency, tokenize
-def compute_hapax_ratios(text: str) -> HapaxResult:
+def _compute_hapax_single(text: str) -> tuple[int, float, int, float, float, float, dict]:
+    """Compute hapax metrics for a single chunk of text.
+    Returns:
+        Tuple of (hapax_count, hapax_ratio, dis_hapax_count, dis_hapax_ratio,
+                  sichel_s, honore_r, metadata_dict).
+        Returns nans for ratios on empty input.
+    """
+    tokens = tokenize(text.lower())
+    N = len(tokens)  # noqa: N806
+    if N == 0:
+        return (
+            0,
+            float("nan"),
+            0,
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            {"token_count": 0, "vocabulary_size": 0},
+        )
+    # Count frequency of each token
+    freq_counter = Counter(tokens)
+    V = len(freq_counter)  # noqa: N806
+    # Count hapax legomena (V₁) and dislegomena (V₂)
+    V1 = sum(1 for count in freq_counter.values() if count == 1)  # noqa: N806
+    V2 = sum(1 for count in freq_counter.values() if count == 2)  # noqa: N806
+    # Sichel's S: ratio of dislegomena to vocabulary size
+    sichel_s = V2 / V if V > 0 else 0.0
+    # Honoré's R: 100 × log(N) / (1 - V₁/V)
+    if V1 == V:
+        honore_r = float("inf")
+    else:
+        honore_r = 100 * math.log(N) / (1 - V1 / V)
+    hapax_ratio = V1 / N if N > 0 else 0.0
+    dis_hapax_ratio = V2 / N if N > 0 else 0.0
+    return (
+        V1,
+        hapax_ratio,
+        V2,
+        dis_hapax_ratio,
+        sichel_s,
+        honore_r,
+        {"token_count": N, "vocabulary_size": V},
+    )
+def compute_hapax_ratios(text: str, chunk_size: int = 1000) -> HapaxResult:
     """
     Compute hapax legomena, hapax dislegomena, and related richness metrics.
+    This function uses native chunked analysis to capture variance and patterns
+    across the text, which is essential for stylometric fingerprinting.
     Hapax legomena = words appearing exactly once
     Hapax dislegomena = words appearing exactly twice
@@ -18,6 +89,10 @@ def compute_hapax_ratios(text: str) -> HapaxResult:
     - Sichel's S: V₂ / V (ratio of dislegomena to total vocabulary)
     - Honoré's R: 100 × log(N) / (1 - V₁/V)
+    Related GitHub Issue:
+        #27 - Native chunked analysis with Distribution dataclass
+        https://github.com/craigtrim/pystylometry/issues/27
     References:
         Sichel, H. S. (1975). On a distribution law for word frequencies.
         Journal of the American Statistical Association, 70(351a), 542-547.
@@ -27,68 +102,123 @@ def compute_hapax_ratios(text: str) -> HapaxResult:
     Args:
         text: Input text to analyze
+        chunk_size: Number of words per chunk (default: 1000)
     Returns:
-        HapaxResult with counts, ratios, Sichel's S, Honoré's R, and metadata
-        Note: When all words are unique (V₁ = V), Honoré's R returns float('inf')
-        to indicate maximal vocabulary richness (division by zero case).
+        HapaxResult with counts, ratios, distributions, and metadata
     Example:
-        >>> text = "The quick brown fox jumps over the lazy dog"
-        >>> result = compute_hapax_ratios(text)
-        >>> result.hapax_count  # Words appearing once
-        7
-        >>> result.dis_hapax_count  # Words appearing twice
-        1
-        >>> print(f"Sichel's S: {result.sichel_s:.3f}")
-        Sichel's S: 0.125
+        >>> result = compute_hapax_ratios("Long text here...", chunk_size=1000)
+        >>> result.hapax_ratio  # Mean across chunks
+        0.45
+        >>> result.hapax_ratio_dist.std  # Variance reveals fingerprint
+        0.08
     """
-    tokens = tokenize(text.lower())
-    N = len(tokens)  # noqa: N806
-    if N == 0:
+    # Chunk the text
+    chunks = chunk_text(text, chunk_size)
+    # Compute metrics per chunk
+    hapax_ratio_values = []
+    dis_hapax_ratio_values = []
+    sichel_s_values = []
+    honore_r_values = []
+    honore_r_inf_count = 0  # Track chunks where all words are unique (V₁ = V)
+    total_hapax_count = 0
+    total_dis_hapax_count = 0
+    total_tokens = 0
+    total_vocab = 0
+    valid_chunk_count = 0
+    for chunk in chunks:
+        h_cnt, h_rat, dh_cnt, dh_rat, sichel, honore, meta = _compute_hapax_single(chunk)
+        total_hapax_count += h_cnt
+        total_dis_hapax_count += dh_cnt
+        total_tokens += meta.get("token_count", 0)
+        total_vocab += meta.get("vocabulary_size", 0)
+        if not math.isnan(h_rat):
+            hapax_ratio_values.append(h_rat)
+            valid_chunk_count += 1
+        if not math.isnan(dh_rat):
+            dis_hapax_ratio_values.append(dh_rat)
+        if not math.isnan(sichel):
+            sichel_s_values.append(sichel)
+        if math.isinf(honore):
+            # Track infinite values (when V₁ = V, maximal vocabulary richness)
+            honore_r_inf_count += 1
+        elif not math.isnan(honore):
+            honore_r_values.append(honore)
+    # Handle empty or all-invalid chunks
+    if not hapax_ratio_values:
+        empty_dist = Distribution(
+            values=[],
+            mean=float("nan"),
+            median=float("nan"),
+            std=0.0,
+            range=0.0,
+            iqr=0.0,
+        )
         return HapaxResult(
             hapax_count=0,
-            hapax_ratio=0.0,
+            hapax_ratio=float("nan"),
             dis_hapax_count=0,
-            dis_hapax_ratio=0.0,
-            sichel_s=0.0,
-            honore_r=0.0,
-            metadata={"token_count": 0, "vocabulary_size": 0},
+            dis_hapax_ratio=float("nan"),
+            sichel_s=float("nan"),
+            honore_r=float("nan"),
+            hapax_ratio_dist=empty_dist,
+            dis_hapax_ratio_dist=empty_dist,
+            sichel_s_dist=empty_dist,
+            honore_r_dist=empty_dist,
+            chunk_size=chunk_size,
+            chunk_count=len(chunks),
+            metadata={"total_token_count": 0, "total_vocabulary_size": 0},
         )
-    # Count frequency of each token
-    freq_counter = Counter(tokens)
-    V = len(freq_counter)  # noqa: N806
-    # Count hapax legomena (V₁) and dislegomena (V₂)
-    V1 = sum(1 for count in freq_counter.values() if count == 1)  # noqa: N806
-    V2 = sum(1 for count in freq_counter.values() if count == 2)  # noqa: N806
-    # Sichel's S: ratio of dislegomena to vocabulary size
-    # S = V₂ / V
-    sichel_s = V2 / V if V > 0 else 0.0
+    # Build distributions
+    hapax_ratio_dist = make_distribution(hapax_ratio_values)
+    dis_hapax_ratio_dist = make_distribution(dis_hapax_ratio_values)
+    sichel_s_dist = (
+        make_distribution(sichel_s_values)
+        if sichel_s_values
+        else Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+    )
-    # Honoré's R: 100 × log(N) / (1 - V₁/V)
-    # R = 100 × log(N) / (1 - V₁/V)
-    # If V₁ = V (all words appear once), denominator is 0, return infinity
-    # This indicates maximal vocabulary richness (every word unique)
-    if V1 == V:
-        honore_r = float("inf")
+    # Handle honore_r specially: if all valid chunks had V₁ = V (all unique words),
+    # return infinity to indicate maximal vocabulary richness
+    if honore_r_values:
+        honore_r_dist = make_distribution(honore_r_values)
+        honore_r_final = honore_r_dist.mean
+    elif honore_r_inf_count > 0 and honore_r_inf_count == valid_chunk_count:
+        # All valid chunks had infinite honore_r (all words unique)
+        honore_r_dist = Distribution(
+            values=[], mean=float("inf"), median=float("inf"), std=0.0, range=0.0, iqr=0.0
+        )
+        honore_r_final = float("inf")
     else:
-        honore_r = 100 * math.log(N) / (1 - V1 / V)
+        honore_r_dist = Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+        honore_r_final = float("nan")
     return HapaxResult(
-        hapax_count=V1,
-        hapax_ratio=V1 / N if N > 0 else 0.0,
-        dis_hapax_count=V2,
-        dis_hapax_ratio=V2 / N if N > 0 else 0.0,
-        sichel_s=sichel_s,
-        honore_r=honore_r,
+        hapax_count=total_hapax_count,
+        hapax_ratio=hapax_ratio_dist.mean,
+        dis_hapax_count=total_dis_hapax_count,
+        dis_hapax_ratio=dis_hapax_ratio_dist.mean,
+        sichel_s=sichel_s_dist.mean,
+        honore_r=honore_r_final,
+        hapax_ratio_dist=hapax_ratio_dist,
+        dis_hapax_ratio_dist=dis_hapax_ratio_dist,
+        sichel_s_dist=sichel_s_dist,
+        honore_r_dist=honore_r_dist,
+        chunk_size=chunk_size,
+        chunk_count=len(chunks),
         metadata={
-            "token_count": N,
-            "vocabulary_size": V,
+            "total_token_count": total_tokens,
+            "total_vocabulary_size": total_vocab,
         },
     )
@@ -148,8 +278,8 @@ def compute_hapax_with_lexicon_analysis(text: str) -> HapaxLexiconResult:
     check_optional_dependency("bnc_lookup", "lexical")
     check_optional_dependency("wordnet_lookup", "lexical")
-    from bnc_lookup import is_bnc_term  # type: ignore[import-not-found]
-    from wordnet_lookup import is_wordnet_term  # type: ignore[import-not-found]
+    from bnc_lookup import exists as is_bnc_term  # type: ignore[import-untyped]
+    from wordnet_lookup import is_wordnet_term  # type: ignore[import-untyped]
     # First compute standard hapax metrics
     hapax_result = compute_hapax_ratios(text)

pystylometry/lexical/mtld.py CHANGED Viewed

@@ -1,6 +1,16 @@
-"""MTLD (Measure of Textual Lexical Diversity) implementation."""
+"""MTLD (Measure of Textual Lexical Diversity) implementation.
-from .._types import MTLDResult
+This module implements MTLD with native chunked analysis for stylometric
+fingerprinting.
+Related GitHub Issue:
+    #27 - Native chunked analysis with Distribution dataclass
+    https://github.com/craigtrim/pystylometry/issues/27
+"""
+import math
+from .._types import Distribution, MTLDResult, chunk_text, make_distribution
 from .._utils import tokenize
@@ -62,13 +72,46 @@ def _calculate_mtld_direction(tokens: list[str], threshold: float, forward: bool
         return float(len(tokens))
+def _compute_mtld_single(text: str, threshold: float) -> tuple[float, float, float, dict]:
+    """Compute MTLD for a single chunk of text.
+    Returns:
+        Tuple of (mtld_forward, mtld_backward, mtld_average, metadata_dict).
+        Returns (nan, nan, nan, metadata) for empty input.
+    """
+    tokens = tokenize(text.lower())
+    if len(tokens) == 0:
+        return (
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            {"token_count": 0},
+        )
+    mtld_forward = _calculate_mtld_direction(tokens, threshold, forward=True)
+    mtld_backward = _calculate_mtld_direction(tokens, threshold, forward=False)
+    mtld_average = (mtld_forward + mtld_backward) / 2
+    return (
+        mtld_forward,
+        mtld_backward,
+        mtld_average,
+        {"token_count": len(tokens)},
+    )
 def compute_mtld(
     text: str,
     threshold: float = 0.72,
+    chunk_size: int = 1000,
 ) -> MTLDResult:
     """
     Compute MTLD (Measure of Textual Lexical Diversity).
+    This function uses native chunked analysis to capture variance and patterns
+    across the text, which is essential for stylometric fingerprinting.
     MTLD measures the mean length of sequential word strings that maintain
     a minimum threshold TTR. It's more robust than simple TTR for texts of
     varying lengths.
@@ -79,6 +122,10 @@ def compute_mtld(
         - Completed factors (segments where TTR dropped below threshold)
         - Partial factor for any remaining incomplete segment (weighted by proximity to threshold)
+    Related GitHub Issue:
+        #27 - Native chunked analysis with Distribution dataclass
+        https://github.com/craigtrim/pystylometry/issues/27
     References:
         McCarthy, P. M., & Jarvis, S. (2010). MTLD, vocd-D, and HD-D:
         A validation study of sophisticated approaches to lexical diversity assessment.
@@ -87,16 +134,20 @@ def compute_mtld(
     Args:
         text: Input text to analyze
         threshold: TTR threshold to maintain (default: 0.72, must be in range (0, 1))
+        chunk_size: Number of words per chunk (default: 1000)
     Returns:
-        MTLDResult with forward, backward, and average MTLD scores
+        MTLDResult with forward, backward, average MTLD scores and distributions
     Raises:
         ValueError: If threshold is not in range (0, 1)
     Example:
-        >>> result = compute_mtld("The quick brown fox jumps over the lazy dog...")
-        >>> print(f"MTLD: {result.mtld_average:.2f}")
+        >>> result = compute_mtld("Long text here...", chunk_size=1000)
+        >>> result.mtld_average  # Mean across chunks
+        72.5
+        >>> result.mtld_average_dist.std  # Variance reveals fingerprint
+        8.3
     """
     # Validate threshold parameter
     if not (0 < threshold < 1):
@@ -105,33 +156,64 @@ def compute_mtld(
             "Common values: 0.72 (default), 0.5-0.8"
         )
-    # Case-insensitive tokenization for consistency with other lexical metrics
-    # (compute_yule, compute_hapax_ratios both use text.lower())
-    tokens = tokenize(text.lower())
-    if len(tokens) == 0:
+    # Chunk the text
+    chunks = chunk_text(text, chunk_size)
+    # Compute metrics per chunk
+    forward_values = []
+    backward_values = []
+    average_values = []
+    total_tokens = 0
+    for chunk in chunks:
+        fwd, bwd, avg, meta = _compute_mtld_single(chunk, threshold)
+        if not math.isnan(fwd):
+            forward_values.append(fwd)
+            backward_values.append(bwd)
+            average_values.append(avg)
+        total_tokens += meta.get("token_count", 0)
+    # Handle empty or all-invalid chunks
+    if not forward_values:
+        empty_dist = Distribution(
+            values=[],
+            mean=float("nan"),
+            median=float("nan"),
+            std=0.0,
+            range=0.0,
+            iqr=0.0,
+        )
         return MTLDResult(
-            mtld_forward=0.0,
-            mtld_backward=0.0,
-            mtld_average=0.0,
-            metadata={"token_count": 0, "threshold": threshold},
+            mtld_forward=float("nan"),
+            mtld_backward=float("nan"),
+            mtld_average=float("nan"),
+            mtld_forward_dist=empty_dist,
+            mtld_backward_dist=empty_dist,
+            mtld_average_dist=empty_dist,
+            chunk_size=chunk_size,
+            chunk_count=len(chunks),
+            metadata={
+                "total_token_count": 0,
+                "threshold": threshold,
+            },
         )
-    # Calculate MTLD in forward direction
-    mtld_forward = _calculate_mtld_direction(tokens, threshold, forward=True)
-    # Calculate MTLD in backward direction
-    mtld_backward = _calculate_mtld_direction(tokens, threshold, forward=False)
-    # Average of forward and backward
-    mtld_average = (mtld_forward + mtld_backward) / 2
+    # Build distributions
+    forward_dist = make_distribution(forward_values)
+    backward_dist = make_distribution(backward_values)
+    average_dist = make_distribution(average_values)
     return MTLDResult(
-        mtld_forward=mtld_forward,
-        mtld_backward=mtld_backward,
-        mtld_average=mtld_average,
+        mtld_forward=forward_dist.mean,
+        mtld_backward=backward_dist.mean,
+        mtld_average=average_dist.mean,
+        mtld_forward_dist=forward_dist,
+        mtld_backward_dist=backward_dist,
+        mtld_average_dist=average_dist,
+        chunk_size=chunk_size,
+        chunk_count=len(chunks),
         metadata={
-            "token_count": len(tokens),
+            "total_token_count": total_tokens,
             "threshold": threshold,
         },
     )

pystylometry/lexical/ttr.py CHANGED Viewed

@@ -2,12 +2,18 @@
 This module provides a facade wrapper around the stylometry-ttr package,
 maintaining consistent API patterns with other pystylometry metrics.
+Related GitHub Issue:
+    #27 - Native chunked analysis with Distribution dataclass
+    https://github.com/craigtrim/pystylometry/issues/27
 """
-from .._types import TTRResult
+from __future__ import annotations
+from .._types import Distribution, TTRResult, make_distribution
-def compute_ttr(text: str, text_id: str | None = None) -> TTRResult:
+def compute_ttr(text: str, text_id: str | None = None, chunk_size: int = 1000) -> TTRResult:
     """
     Compute Type-Token Ratio (TTR) metrics for vocabulary richness.
@@ -22,6 +28,10 @@ def compute_ttr(text: str, text_id: str | None = None) -> TTRResult:
     - STTR: Standardized TTR across fixed-size chunks (reduces length bias)
     - Delta Std: Standard deviation of TTR across chunks (vocabulary consistency)
+    Related GitHub Issue:
+        #27 - Native chunked analysis with Distribution dataclass
+        https://github.com/craigtrim/pystylometry/issues/27
     References:
         Guiraud, P. (1960). Problèmes et méthodes de la statistique linguistique.
         Herdan, G. (1960). Type-token Mathematics: A Textbook of Mathematical
@@ -32,9 +42,14 @@ def compute_ttr(text: str, text_id: str | None = None) -> TTRResult:
     Args:
         text: Input text to analyze
         text_id: Optional identifier for the text (for tracking purposes)
+        chunk_size: Number of words per chunk (default: 1000).
+            Note: The stylometry-ttr package handles its own internal chunking,
+            so this parameter is included for API consistency but actual chunking
+            behavior is delegated to stylometry-ttr.
     Returns:
-        TTRResult with all TTR variants and metadata
+        TTRResult with all TTR variants and metadata, including Distribution
+        objects for stylometric fingerprinting.
     Example:
         >>> result = compute_ttr("The quick brown fox jumps over the lazy dog.")
@@ -63,17 +78,68 @@ def compute_ttr(text: str, text_id: str | None = None) -> TTRResult:
     # Note: stylometry-ttr requires text_id to be a string, not None
     ttr_result = _compute_ttr(text, text_id=text_id or "")
+    # Extract values, handling None for short texts
+    ttr_val = ttr_result.ttr
+    root_ttr_val = ttr_result.root_ttr
+    log_ttr_val = ttr_result.log_ttr
+    sttr_val = ttr_result.sttr if ttr_result.sttr is not None else 0.0
+    delta_std_val = ttr_result.delta_std if ttr_result.delta_std is not None else 0.0
+    # Create single-value distributions from stylometry-ttr results
+    # The stylometry-ttr package handles its own internal chunking for STTR
+    # so we wrap the aggregate results in Distribution objects
+    ttr_dist = (
+        make_distribution([ttr_val])
+        if ttr_val is not None
+        else Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+    )
+    root_ttr_dist = (
+        make_distribution([root_ttr_val])
+        if root_ttr_val is not None
+        else Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+    )
+    log_ttr_dist = (
+        make_distribution([log_ttr_val])
+        if log_ttr_val is not None
+        else Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+    )
+    sttr_dist = (
+        make_distribution([sttr_val])
+        if ttr_result.sttr is not None
+        else Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+    )
+    delta_std_dist = (
+        make_distribution([delta_std_val])
+        if ttr_result.delta_std is not None
+        else Distribution(
+            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        )
+    )
     # Convert to our TTRResult dataclass
-    # The stylometry-ttr result has attributes we can access
-    # Some fields (sttr, delta_std) may be None for short texts
     return TTRResult(
         total_words=ttr_result.total_words,
         unique_words=ttr_result.unique_words,
-        ttr=ttr_result.ttr,
-        root_ttr=ttr_result.root_ttr,
-        log_ttr=ttr_result.log_ttr,
-        sttr=ttr_result.sttr if ttr_result.sttr is not None else 0.0,
-        delta_std=ttr_result.delta_std if ttr_result.delta_std is not None else 0.0,
+        ttr=ttr_val if ttr_val is not None else float("nan"),
+        root_ttr=root_ttr_val if root_ttr_val is not None else float("nan"),
+        log_ttr=log_ttr_val if log_ttr_val is not None else float("nan"),
+        sttr=sttr_val,
+        delta_std=delta_std_val,
+        ttr_dist=ttr_dist,
+        root_ttr_dist=root_ttr_dist,
+        log_ttr_dist=log_ttr_dist,
+        sttr_dist=sttr_dist,
+        delta_std_dist=delta_std_dist,
+        chunk_size=chunk_size,
+        chunk_count=1,  # stylometry-ttr returns aggregate results
         metadata={
             "text_id": text_id or "",
             "source": "stylometry-ttr",

pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

pystylometry 1.0.0py3-none-any.whl → 1.3.0py3-none-any.whl