PyPI - pystylometry - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

pystylometry 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pystylometry/__init__.py +29 -3
pystylometry/_types.py +963 -259
pystylometry/authorship/__init__.py +23 -2
pystylometry/authorship/additional_methods.py +4 -29
pystylometry/authorship/kilgarriff.py +347 -0
pystylometry/character/character_metrics.py +267 -179
pystylometry/cli.py +427 -0
pystylometry/consistency/__init__.py +57 -0
pystylometry/consistency/_thresholds.py +162 -0
pystylometry/consistency/drift.py +549 -0
pystylometry/dialect/__init__.py +65 -0
pystylometry/dialect/_data/dialect_markers.json +1134 -0
pystylometry/dialect/_loader.py +360 -0
pystylometry/dialect/detector.py +533 -0
pystylometry/lexical/advanced_diversity.py +61 -22
pystylometry/lexical/function_words.py +255 -56
pystylometry/lexical/hapax.py +182 -52
pystylometry/lexical/mtld.py +108 -26
pystylometry/lexical/ttr.py +76 -10
pystylometry/lexical/word_frequency_sophistication.py +1522 -298
pystylometry/lexical/yule.py +136 -50
pystylometry/ngrams/entropy.py +150 -49
pystylometry/readability/additional_formulas.py +1887 -762
pystylometry/readability/ari.py +144 -82
pystylometry/readability/coleman_liau.py +136 -109
pystylometry/readability/flesch.py +177 -73
pystylometry/readability/gunning_fog.py +165 -161
pystylometry/readability/smog.py +123 -42
pystylometry/syntactic/advanced_syntactic.py +76 -14
pystylometry/syntactic/pos_ratios.py +70 -6
pystylometry/syntactic/sentence_stats.py +55 -12
pystylometry/syntactic/sentence_types.py +71 -15
pystylometry/viz/__init__.py +71 -0
pystylometry/viz/drift.py +589 -0
pystylometry/viz/jsx/__init__.py +31 -0
pystylometry/viz/jsx/_base.py +144 -0
pystylometry/viz/jsx/report.py +677 -0
pystylometry/viz/jsx/timeline.py +716 -0
pystylometry/viz/jsx/viewer.py +1032 -0
{pystylometry-1.0.0.dist-info → pystylometry-1.1.0.dist-info}/METADATA +5 -2
pystylometry-1.1.0.dist-info/RECORD +63 -0
{pystylometry-1.0.0.dist-info → pystylometry-1.1.0.dist-info}/WHEEL +1 -1
pystylometry-1.1.0.dist-info/entry_points.txt +4 -0
pystylometry-1.0.0.dist-info/RECORD +0 -46

pystylometry/readability/gunning_fog.py CHANGED Viewed

@@ -3,6 +3,12 @@
 This module computes the Gunning Fog Index, a readability metric that
 estimates the years of formal education needed to understand text on first reading.
+This implementation includes native chunked analysis for stylometric fingerprinting.
+Related GitHub Issues:
+    #4 - NLP-enhanced complex word detection
+    #27 - Native chunked analysis with Distribution dataclass
 Historical Background:
 ----------------------
 The Gunning Fog Index was developed by Robert Gunning in 1952 as part of his
@@ -12,221 +18,219 @@ a U.S. grade-level score (e.g., 12 = high school senior reading level).
 Reference:
     Gunning, R. (1952). The Technique of Clear Writing.
     McGraw-Hill, New York.
-Implementation Notes (PR #4):
-------------------------------
-This implementation addresses issues raised in GitHub PR #4:
-https://github.com/craigtrim/pystylometry/pull/4
-The original TODO implementation used simple syllable counting without proper
-exclusions for proper nouns, compounds, or inflections. This NLP-enhanced
-version uses the complex_words module for accurate detection via:
-1. spaCy POS tagging for proper noun detection (enhanced mode)
-2. spaCy lemmatization for morphological analysis (enhanced mode)
-3. Component-based analysis for hyphenated words (both modes)
-4. Graceful fallback to heuristics when spaCy unavailable (basic mode)
-See complex_words.py for detailed rationale and implementation.
 """
+import math
 from .._normalize import normalize_for_readability
-from .._types import GunningFogResult
+from .._types import Distribution, GunningFogResult, chunk_text, make_distribution
 from .._utils import split_sentences, tokenize
-# Import NLP-enhanced complex word detection module
-# This module addresses PR #4 issues with proper noun and inflection detection
 from .complex_words import process_text_for_complex_words
 # Formula coefficient from Gunning (1952)
-# Reference: Gunning, R. (1952). The Technique of Clear Writing. McGraw-Hill.
-# The 0.4 coefficient scales the combined complexity measure to approximate grade level
 _FOG_COEFFICIENT = 0.4
-def compute_gunning_fog(text: str, spacy_model: str = "en_core_web_sm") -> GunningFogResult:
+def _compute_gunning_fog_single(text: str, spacy_model: str) -> tuple[float, float, dict]:
+    """Compute Gunning Fog metrics for a single chunk of text.
+    Returns:
+        Tuple of (fog_index, grade_level, metadata_dict).
+        Returns (nan, nan, metadata) for empty/invalid input.
+    """
+    sentences = split_sentences(text)
+    all_tokens = tokenize(text)
+    tokens = normalize_for_readability(all_tokens)
+    if len(sentences) == 0 or len(tokens) == 0:
+        return (
+            float("nan"),
+            float("nan"),
+            {
+                "sentence_count": 0,
+                "word_count": 0,
+                "complex_word_count": 0,
+                "complex_word_percentage": 0.0,
+            },
+        )
+    # Count complex words using NLP-enhanced detection
+    complex_word_count, detection_metadata = process_text_for_complex_words(
+        text, tokens, model=spacy_model
+    )
+    # Calculate formula components
+    average_words_per_sentence = len(tokens) / len(sentences)
+    complex_word_percentage = (complex_word_count / len(tokens)) * 100
+    # Apply Gunning Fog formula
+    fog_index = _FOG_COEFFICIENT * (average_words_per_sentence + complex_word_percentage)
+    grade_level = max(0, min(20, round(fog_index)))
+    metadata = {
+        "sentence_count": len(sentences),
+        "word_count": len(tokens),
+        "complex_word_count": complex_word_count,
+        "complex_word_percentage": complex_word_percentage,
+        "average_words_per_sentence": average_words_per_sentence,
+        **detection_metadata,
+    }
+    return (fog_index, float(grade_level), metadata)
+def compute_gunning_fog(
+    text: str, chunk_size: int = 1000, spacy_model: str = "en_core_web_sm"
+) -> GunningFogResult:
     """
     Compute Gunning Fog Index with NLP-enhanced complex word detection.
-    The Gunning Fog Index estimates the years of formal education required
-    to understand text on first reading. It combines sentence length and
-    lexical complexity (polysyllabic words) into a single grade-level score.
+    This function uses native chunked analysis to capture variance and patterns
+    across the text, which is essential for stylometric fingerprinting.
     Formula (Gunning, 1952):
     ------------------------
         Fog Index = 0.4 × [(words/sentences) + 100 × (complex words/words)]
-    Where:
-        - words/sentences = Average Sentence Length (ASL)
-        - complex words/words = Percentage of Hard Words (PHW)
-        - 0.4 = Scaling coefficient to approximate U.S. grade levels
-    The resulting score represents a U.S. education grade level:
-        - 6 = Sixth grade (age 11-12)
-        - 12 = High school senior (age 17-18)
-        - 17+ = College graduate level
-    Complex Words Definition (Gunning, 1952):
-    ------------------------------------------
-    Words with 3+ syllables, EXCLUDING:
+    Where complex words are words with 3+ syllables, EXCLUDING:
         1. Proper nouns (names, places, organizations)
         2. Compound words (hyphenated)
         3. Common verb forms (-es, -ed, -ing endings)
+    Related GitHub Issues:
+        #4 - NLP-enhanced complex word detection
+        #27 - Native chunked analysis with Distribution dataclass
     Reference:
         Gunning, R. (1952). The Technique of Clear Writing. McGraw-Hill.
-        Pages 38-39: Complex word criteria
-    NLP Enhancement (PR #4):
-    ------------------------
-    This implementation addresses issues in GitHub PR #4:
-    https://github.com/craigtrim/pystylometry/pull/4
-    **Enhanced Mode** (when spaCy available):
-        - Uses POS tagging (PROPN) for proper noun detection
-        - Uses lemmatization for morphological analysis
-        - Analyzes hyphenated word components individually
-        - More accurate, handles edge cases (acronyms, irregular verbs)
-    **Basic Mode** (when spaCy unavailable):
-        - Uses capitalization heuristic for proper nouns
-        - Uses simple suffix stripping for inflections
-        - Analyzes hyphenated word components individually
-        - Less accurate but requires no external dependencies
-    The mode used is reported in metadata for transparency.
     Args:
         text: Input text to analyze
+        chunk_size: Number of words per chunk (default: 1000).
+            The text is divided into chunks of this size, and metrics are
+            computed per-chunk.
         spacy_model: spaCy model name for enhanced mode (default: "en_core_web_sm")
-                    Requires model download: python -m spacy download en_core_web_sm
-                    Other options: "en_core_web_md", "en_core_web_lg"
     Returns:
         GunningFogResult with:
-            - fog_index: Float, the calculated Gunning Fog Index
-            - grade_level: Float, rounded U.S. grade level (0-20), or NaN if empty
-            - metadata: Dict with:
-                - sentence_count: Number of sentences
-                - word_count: Number of words (tokens)
-                - complex_word_count: Number of complex words
-                - complex_word_percentage: Percentage of complex words
-                - average_words_per_sentence: Mean sentence length
-                - reliable: Boolean, True if word_count >= 100 and sentence_count >= 3
-                - mode: "enhanced" (spaCy) or "basic" (heuristics)
-                - proper_noun_detection: Detection method used
-                - inflection_handling: Inflection analysis method used
-                - spacy_model: Model name if enhanced mode (else absent)
+            - fog_index: Mean Fog Index across chunks
+            - grade_level: Mean grade level across chunks
+            - fog_index_dist: Distribution with per-chunk values and stats
+            - grade_level_dist: Distribution with per-chunk values and stats
+            - chunk_size: The chunk size used
+            - chunk_count: Number of chunks analyzed
     Example:
-        >>> # Simple text (low complexity)
-        >>> result = compute_gunning_fog("The cat sat on the mat. The dog ran.")
-        >>> print(f"Fog Index: {result.fog_index:.1f}")
-        Fog Index: 2.7
-        >>> print(f"Grade Level: {result.grade_level}")
-        Grade Level: 3
-        >>> print(f"Mode: {result.metadata['mode']}")
-        Mode: enhanced
-        >>> # Complex academic text (high complexity)
-        >>> text = "Understanding phenomenological hermeneutics necessitates comprehensive study."
-        >>> result = compute_gunning_fog(text)
-        >>> print(f"Fog Index: {result.fog_index:.1f}")
-        Fog Index: 23.6
-        >>> print(f"Grade Level: {result.grade_level}")
-        Grade Level: 20
-        >>> # Check which detection mode was used
-        >>> if result.metadata['mode'] == 'enhanced':
-        ...     print("Using spaCy NLP features")
-        Using spaCy NLP features
-    Notes:
-        - Empty text returns fog_index=NaN and grade_level=NaN (no data)
-        - Grade levels are clamped to [0, 20] range for valid input
-        - For short texts (< 100 words), results may be unreliable
-        - Gunning (1952) recommends analyzing samples of 100+ words
+        >>> result = compute_gunning_fog("Long text here...", chunk_size=1000)
+        >>> result.fog_index  # Mean across chunks
+        12.5
+        >>> result.fog_index_dist.std  # Variance reveals fingerprint
+        2.1
     """
-    # Step 1: Sentence and word tokenization
-    # Using the project's standard utilities for consistency
-    sentences = split_sentences(text)
-    all_tokens = tokenize(text)
-    # Filter to only valid words (exclude punctuation, numbers, URLs, emails)
-    # Allows hyphenated words and contractions per Gunning (1952)
-    # Prevents errors in syllable counting from non-word tokens
-    tokens = normalize_for_readability(all_tokens)
-    # Edge case: Empty or whitespace-only input
-    # Return NaN to distinguish "no data" from actual zero scores
-    # This matches SMOG behavior and prevents conflating empty input with simple text
-    if len(sentences) == 0 or len(tokens) == 0:
+    # Chunk the text
+    chunks = chunk_text(text, chunk_size)
+    # Compute metrics per chunk
+    fog_values = []
+    grade_values = []
+    total_sentences = 0
+    total_words = 0
+    total_complex = 0
+    detection_metadata: dict = {}
+    for chunk in chunks:
+        fi, gl, meta = _compute_gunning_fog_single(chunk, spacy_model)
+        if not math.isnan(fi):
+            fog_values.append(fi)
+            grade_values.append(gl)
+        total_sentences += meta.get("sentence_count", 0)
+        total_words += meta.get("word_count", 0)
+        total_complex += meta.get("complex_word_count", 0)
+        # Capture detection metadata from first chunk (same for all chunks)
+        if not detection_metadata and "mode" in meta:
+            detection_metadata = {
+                "mode": meta.get("mode"),
+                "proper_noun_detection": meta.get("proper_noun_detection"),
+                "inflection_handling": meta.get("inflection_handling"),
+            }
+            if "spacy_model" in meta:
+                detection_metadata["spacy_model"] = meta.get("spacy_model")
+    # Handle empty or all-invalid chunks
+    if not fog_values:
+        empty_dist = Distribution(
+            values=[],
+            mean=float("nan"),
+            median=float("nan"),
+            std=0.0,
+            range=0.0,
+            iqr=0.0,
+        )
         return GunningFogResult(
             fog_index=float("nan"),
             grade_level=float("nan"),
+            fog_index_dist=empty_dist,
+            grade_level_dist=empty_dist,
+            chunk_size=chunk_size,
+            chunk_count=len(chunks),
             metadata={
+                # Backward-compatible keys
                 "sentence_count": 0,
                 "word_count": 0,
                 "complex_word_count": 0,
                 "complex_word_percentage": 0.0,
                 "average_words_per_sentence": 0.0,
+                # New prefixed keys for consistency
+                "total_sentence_count": 0,
+                "total_word_count": 0,
+                "total_complex_word_count": 0,
                 "reliable": False,
+                # Detection metadata
                 "mode": "none",
-                "proper_noun_detection": "N/A",
-                "inflection_handling": "N/A",
+                "proper_noun_detection": "none",
+                "inflection_handling": "none",
             },
         )
-    # Step 2: Count complex words using NLP-enhanced detection
-    # This addresses PR #4 issues with proper noun and inflection detection
-    # See complex_words.py for detailed implementation
-    complex_word_count, detection_metadata = process_text_for_complex_words(
-        text, tokens, model=spacy_model
-    )
-    # Step 3: Calculate formula components
-    # Reference: Gunning (1952), p. 40: "The Fog Index formula"
+    # Build distributions
+    fog_dist = make_distribution(fog_values)
+    grade_dist = make_distribution(grade_values)
-    # Average Sentence Length (ASL)
-    # Number of words divided by number of sentences
-    average_words_per_sentence = len(tokens) / len(sentences)
-    # Percentage of Hard Words (PHW)
-    # Number of complex words divided by total words, multiplied by 100
-    complex_word_percentage = (complex_word_count / len(tokens)) * 100
-    # Step 4: Apply Gunning Fog formula
-    # Fog = 0.4 × (ASL + PHW)
-    # The 0.4 coefficient scales the result to approximate U.S. grade levels
-    fog_index = _FOG_COEFFICIENT * (average_words_per_sentence + complex_word_percentage)
-    # Step 5: Convert to grade level
-    # Round to nearest integer using standard rounding (round half to even)
-    # Clamp to reasonable range [0, 20] to prevent extreme values
-    # Note: Texts with fog_index > 20 are considered "post-graduate" level
-    grade_level = max(0, min(20, round(fog_index)))
+    # Reliability heuristic
+    reliable = total_words >= 100 and total_sentences >= 3
-    # Reliability heuristic: Gunning (1952) recommends 100+ word samples
-    # Also require 3+ sentences to ensure meaningful average sentence length
-    # Very long texts with few sentences can produce unstable FOG estimates
-    reliable = len(tokens) >= 100 and len(sentences) >= 3
+    # Ensure detection metadata has defaults
+    if not detection_metadata:
+        detection_metadata = {
+            "mode": "none",
+            "proper_noun_detection": "none",
+            "inflection_handling": "none",
+        }
-    # Step 6: Assemble result with comprehensive metadata
     return GunningFogResult(
-        fog_index=fog_index,
-        grade_level=grade_level,
+        fog_index=fog_dist.mean,
+        grade_level=grade_dist.mean,
+        fog_index_dist=fog_dist,
+        grade_level_dist=grade_dist,
+        chunk_size=chunk_size,
+        chunk_count=len(chunks),
         metadata={
-            # Core counts
-            "sentence_count": len(sentences),
-            "word_count": len(tokens),
-            "complex_word_count": complex_word_count,
-            # Derived metrics
-            "complex_word_percentage": complex_word_percentage,
-            "average_words_per_sentence": average_words_per_sentence,
-            # Reliability indicator
+            # Backward-compatible keys
+            "sentence_count": total_sentences,
+            "word_count": total_words,
+            "complex_word_count": total_complex,
+            "complex_word_percentage": (total_complex / total_words * 100)
+            if total_words > 0
+            else 0,
+            "average_words_per_sentence": total_words / total_sentences
+            if total_sentences > 0
+            else 0,
+            # New prefixed keys for consistency
+            "total_sentence_count": total_sentences,
+            "total_word_count": total_words,
+            "total_complex_word_count": total_complex,
             "reliable": reliable,
-            # Detection method transparency (from complex_words module)
-            # This allows users to verify which mode was used
+            # Detection metadata
             **detection_metadata,
         },
     )

pystylometry/readability/smog.py CHANGED Viewed

@@ -1,17 +1,62 @@
-"""SMOG (Simple Measure of Gobbledygook) Index."""
+"""SMOG (Simple Measure of Gobbledygook) Index.
+This module implements the SMOG readability formula with native chunked
+analysis for stylometric fingerprinting.
+Related GitHub Issue:
+    #27 - Native chunked analysis with Distribution dataclass
+    https://github.com/craigtrim/pystylometry/issues/27
+"""
 import math
 from .._normalize import normalize_for_readability
-from .._types import SMOGResult
+from .._types import Distribution, SMOGResult, chunk_text, make_distribution
 from .._utils import split_sentences, tokenize
 from .syllables import count_syllables
-def compute_smog(text: str) -> SMOGResult:
+def _compute_smog_single(text: str) -> tuple[float, float, dict]:
+    """Compute SMOG metrics for a single chunk of text.
+    Returns:
+        Tuple of (smog_index, grade_level, metadata_dict).
+        Returns (nan, nan, metadata) for empty/invalid input.
+    """
+    sentences = split_sentences(text)
+    tokens = tokenize(text)
+    word_tokens = normalize_for_readability(tokens)
+    if len(sentences) == 0 or len(word_tokens) == 0:
+        return (
+            float("nan"),
+            float("nan"),
+            {"sentence_count": 0, "word_count": 0, "polysyllable_count": 0},
+        )
+    # Count polysyllables (words with 3+ syllables)
+    polysyllable_count = sum(1 for word in word_tokens if count_syllables(word) >= 3)
+    # SMOG formula
+    smog_index = 1.043 * math.sqrt(polysyllable_count * 30 / len(sentences)) + 3.1291
+    grade_level = max(0, min(20, math.floor(smog_index + 0.5)))
+    metadata = {
+        "sentence_count": len(sentences),
+        "word_count": len(word_tokens),
+        "polysyllable_count": polysyllable_count,
+    }
+    return (smog_index, float(grade_level), metadata)
+def compute_smog(text: str, chunk_size: int = 1000) -> SMOGResult:
     """
     Compute SMOG (Simple Measure of Gobbledygook) Index.
+    This function uses native chunked analysis to capture variance and patterns
+    across the text, which is essential for stylometric fingerprinting.
     Formula:
         SMOG = 1.043 × √(polysyllables × 30/sentences) + 3.1291
@@ -20,69 +65,105 @@ def compute_smog(text: str) -> SMOGResult:
     The SMOG index estimates the years of education needed to understand the text.
     It's particularly useful for healthcare materials.
+    Related GitHub Issue:
+        #27 - Native chunked analysis with Distribution dataclass
+        https://github.com/craigtrim/pystylometry/issues/27
     References:
         McLaughlin, G. H. (1969). SMOG grading: A new readability formula.
         Journal of Reading, 12(8), 639-646.
     Args:
         text: Input text to analyze
+        chunk_size: Number of words per chunk (default: 1000).
+            The text is divided into chunks of this size, and metrics are
+            computed per-chunk.
     Returns:
-        SMOGResult with SMOG index and grade level
-        Note: For empty input (no sentences or words), smog_index and grade_level
-        will be float('nan'). This prevents conflating "no data" with actual scores.
-        SMOG is designed for texts with 30+ sentences. For shorter texts, the formula
-        still computes but a warning is included in metadata. Results may be less reliable.
+        SMOGResult with:
+            - smog_index: Mean SMOG index across chunks
+            - grade_level: Mean grade level across chunks
+            - smog_index_dist: Distribution with per-chunk values and stats
+            - grade_level_dist: Distribution with per-chunk values and stats
+            - chunk_size: The chunk size used
+            - chunk_count: Number of chunks analyzed
     Example:
-        >>> text = "Caffeinated programmers debugged incomprehensible code."
-        >>> result = compute_smog(text)
-        >>> print(f"SMOG Index: {result.smog_index:.1f}")
-        >>> print(f"Grade Level: {result.grade_level}")
+        >>> result = compute_smog("Long text here...", chunk_size=1000)
+        >>> result.smog_index  # Mean across chunks
+        12.5
+        >>> result.smog_index_dist.std  # Variance reveals fingerprint
+        1.8
     """
-    sentences = split_sentences(text)
-    tokens = tokenize(text)
-    # Filter tokens to only valid words for syllable counting
-    # Removes numbers, URLs, emails, etc. that would cause errors
-    word_tokens = normalize_for_readability(tokens)
-    if len(sentences) == 0 or len(word_tokens) == 0:
+    # Chunk the text
+    chunks = chunk_text(text, chunk_size)
+    # Compute metrics per chunk
+    smog_values = []
+    grade_values = []
+    total_sentences = 0
+    total_words = 0
+    total_polysyllables = 0
+    for chunk in chunks:
+        si, gl, meta = _compute_smog_single(chunk)
+        if not math.isnan(si):
+            smog_values.append(si)
+            grade_values.append(gl)
+        total_sentences += meta.get("sentence_count", 0)
+        total_words += meta.get("word_count", 0)
+        total_polysyllables += meta.get("polysyllable_count", 0)
+    # Handle empty or all-invalid chunks
+    if not smog_values:
+        empty_dist = Distribution(
+            values=[],
+            mean=float("nan"),
+            median=float("nan"),
+            std=0.0,
+            range=0.0,
+            iqr=0.0,
+        )
         return SMOGResult(
             smog_index=float("nan"),
             grade_level=float("nan"),
+            smog_index_dist=empty_dist,
+            grade_level_dist=empty_dist,
+            chunk_size=chunk_size,
+            chunk_count=len(chunks),
             metadata={
+                # Backward-compatible keys
                 "sentence_count": 0,
                 "word_count": 0,
                 "polysyllable_count": 0,
+                # New prefixed keys for consistency
+                "total_sentence_count": 0,
+                "total_word_count": 0,
+                "total_polysyllable_count": 0,
                 "warning": "Insufficient text",
             },
         )
-    # Count polysyllables (words with 3+ syllables) - safe now, only valid words
-    polysyllable_count = sum(1 for word in word_tokens if count_syllables(word) >= 3)
-    # SMOG formula: 1.043 × √(polysyllables × 30/sentences) + 3.1291
-    smog_index = 1.043 * math.sqrt(polysyllable_count * 30 / len(sentences)) + 3.1291
-    # Use round-half-up rounding (not banker's rounding)
-    # Clamp to valid grade range [0, 20]
-    # Round half up: 4.5 → 5 (not Python's default round-half-to-even)
-    # math.floor(x + 0.5) implements round-half-up for both positive and negative values
-    # Lower bound: Prevent negative grades
-    # (though mathematically unlikely with SMOG's +3.1291 constant)
-    # Upper bound: Cap at grade 20 (post-graduate) for extreme complexity
-    grade_level = max(0, min(20, math.floor(smog_index + 0.5)))
+    # Build distributions
+    smog_dist = make_distribution(smog_values)
+    grade_dist = make_distribution(grade_values)
     return SMOGResult(
-        smog_index=smog_index,
-        grade_level=grade_level,
+        smog_index=smog_dist.mean,
+        grade_level=grade_dist.mean,
+        smog_index_dist=smog_dist,
+        grade_level_dist=grade_dist,
+        chunk_size=chunk_size,
+        chunk_count=len(chunks),
         metadata={
-            "sentence_count": len(sentences),
-            "word_count": len(word_tokens),
-            "polysyllable_count": polysyllable_count,
-            "warning": "Less than 30 sentences" if len(sentences) < 30 else None,
+            # Backward-compatible keys
+            "sentence_count": total_sentences,
+            "word_count": total_words,
+            "polysyllable_count": total_polysyllables,
+            # New prefixed keys for consistency
+            "total_sentence_count": total_sentences,
+            "total_word_count": total_words,
+            "total_polysyllable_count": total_polysyllables,
+            "warning": "Less than 30 sentences" if total_sentences < 30 else None,
         },
     )

pystylometry 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

pystylometry 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl