PyPI - pystylometry - Versions diffs - 1.3.0__py3-none-any.whl → 1.3.5__py3-none-any.whl - Mend

pystylometry 1.3.0py3-none-any.whl → 1.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

pystylometry/__init__.py +42 -3
pystylometry/_types.py +205 -3
pystylometry/cli.py +321 -0
pystylometry/lexical/__init__.py +5 -1
pystylometry/lexical/repetition.py +506 -0
pystylometry/lexical/ttr.py +288 -97
pystylometry-1.3.5.dist-info/LICENSE +21 -0
pystylometry-1.3.5.dist-info/METADATA +78 -0
{pystylometry-1.3.0.dist-info → pystylometry-1.3.5.dist-info}/RECORD +11 -9
{pystylometry-1.3.0.dist-info → pystylometry-1.3.5.dist-info}/WHEEL +1 -1
{pystylometry-1.3.0.dist-info → pystylometry-1.3.5.dist-info}/entry_points.txt +1 -0
pystylometry-1.3.0.dist-info/METADATA +0 -136

pystylometry/lexical/ttr.py CHANGED Viewed

@@ -1,149 +1,340 @@
-"""Type-Token Ratio (TTR) analysis using stylometry-ttr package.
+"""Type-Token Ratio (TTR) analysis with native chunked computation.
-This module provides a facade wrapper around the stylometry-ttr package,
-maintaining consistent API patterns with other pystylometry metrics.
+Computes multiple TTR variants for measuring lexical diversity (vocabulary
+richness). All metrics are computed per-chunk and wrapped in Distribution
+objects for stylometric fingerprinting.
-Related GitHub Issue:
+Previously delegated to the external ``stylometry-ttr`` package; now
+computed inline using only the Python standard library (``math`` and
+``statistics``).
+Related GitHub Issues:
     #27 - Native chunked analysis with Distribution dataclass
     https://github.com/craigtrim/pystylometry/issues/27
+    #43 - Inline stylometry-ttr into pystylometry (remove external dependency)
+    https://github.com/craigtrim/pystylometry/issues/43
+References:
+    Guiraud, P. (1960). Problèmes et méthodes de la statistique linguistique.
+    Herdan, G. (1960). Type-token Mathematics: A Textbook of Mathematical
+        Linguistics. Mouton.
+    Johnson, W. (1944). Studies in language behavior: I. A program of research.
+        Psychological Monographs, 56(2), 1-15.
 """
 from __future__ import annotations
-from .._types import Distribution, TTRResult, make_distribution
+import math
+import statistics
+from typing import Optional
+from .._types import Distribution, TTRAggregateResult, TTRResult, make_distribution
+from ..tokenizer import Tokenizer
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+# Minimum words required before STTR computation is meaningful.
+# With fewer words we cannot form at least two full chunks, so the
+# standardised metric would be unreliable.
+_MIN_WORDS_FOR_STTR = 2000
+def _compute_chunk_ttrs(tokens: list[str], chunk_size: int) -> list[float]:
+    """Compute per-chunk raw TTR values for non-overlapping chunks.
+    Only full-sized chunks are included so that every TTR is measured on the
+    same token count, keeping the standardised metric unbiased.
+    Args:
+        tokens: Full token list.
+        chunk_size: Number of tokens per chunk.
+    Returns:
+        List of per-chunk TTR values (may be empty if too few tokens).
+    """
+    total = len(tokens)
+    chunk_ttrs: list[float] = []
+    for i in range(0, total - chunk_size + 1, chunk_size):
+        chunk = tokens[i : i + chunk_size]
+        chunk_ttrs.append(len(set(chunk)) / chunk_size)
+    return chunk_ttrs
+def _compute_deltas(
+    chunk_ttrs: list[float],
+) -> tuple[Optional[float], Optional[float], Optional[float], Optional[float]]:
+    """Compute delta metrics: TTR(n) - TTR(n-1) for consecutive chunks.
-def compute_ttr(text: str, text_id: str | None = None, chunk_size: int = 1000) -> TTRResult:
+    Delta metrics capture chunk-to-chunk vocabulary variability:
+    - delta_mean: average change (positive = expanding vocabulary)
+    - delta_std: volatility of change (stylometric fingerprint)
+    - delta_min: largest negative swing
+    - delta_max: largest positive swing
+    Args:
+        chunk_ttrs: Per-chunk TTR values (needs >= 2 values).
+    Returns:
+        Tuple of (delta_mean, delta_std, delta_min, delta_max).
+        All ``None`` when fewer than 2 chunks are available.
     """
-    Compute Type-Token Ratio (TTR) metrics for vocabulary richness.
+    if len(chunk_ttrs) < 2:
+        return None, None, None, None
+    deltas = [chunk_ttrs[i] - chunk_ttrs[i - 1] for i in range(1, len(chunk_ttrs))]
+    d_mean = statistics.mean(deltas)
+    d_std = statistics.stdev(deltas) if len(deltas) > 1 else 0.0
+    return d_mean, d_std, min(deltas), max(deltas)
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
-    This is a facade wrapper around the stylometry-ttr package that provides
-    multiple TTR variants for measuring lexical diversity. TTR measures the
-    ratio of unique words (types) to total words (tokens).
+def compute_ttr(
+    text: str,
+    text_id: str | None = None,
+    chunk_size: int = 1000,
+) -> TTRResult:
+    """Compute Type-Token Ratio (TTR) metrics for vocabulary richness.
+    Tokenises the input with pystylometry's ``Tokenizer`` (lowercase, words
+    only), then computes five TTR-family metrics.  Each metric is computed
+    per-chunk and the full per-chunk distribution is exposed via a
+    ``Distribution`` object for stylometric fingerprinting.
     Metrics computed:
-    - Raw TTR: unique_words / total_words
-    - Root TTR (Guiraud's index): unique_words / sqrt(total_words)
-    - Log TTR (Herdan's C): log(unique_words) / log(total_words)
-    - STTR: Standardized TTR across fixed-size chunks (reduces length bias)
-    - Delta Std: Standard deviation of TTR across chunks (vocabulary consistency)
+        - **Raw TTR**: ``unique_words / total_words``
+        - **Root TTR** (Guiraud's index): ``unique_words / sqrt(total_words)``
+        - **Log TTR** (Herdan's C): ``log(unique_words) / log(total_words)``
+        - **STTR**: Mean TTR across fixed-size chunks (reduces length bias).
+          Only computed when the text has >= 2000 words.
+        - **Delta Std**: Std-dev of chunk-to-chunk TTR change (vocabulary
+          consistency).  Only computed when >= 2 chunks are available.
-    Related GitHub Issue:
+    Related GitHub Issues:
         #27 - Native chunked analysis with Distribution dataclass
         https://github.com/craigtrim/pystylometry/issues/27
+        #43 - Inline stylometry-ttr into pystylometry
+        https://github.com/craigtrim/pystylometry/issues/43
     References:
-        Guiraud, P. (1960). Problèmes et méthodes de la statistique linguistique.
+        Guiraud, P. (1960). Problèmes et méthodes de la statistique
+            linguistique.
         Herdan, G. (1960). Type-token Mathematics: A Textbook of Mathematical
             Linguistics. Mouton.
-        Johnson, W. (1944). Studies in language behavior: I. A program of research.
-            Psychological Monographs, 56(2), 1-15.
+        Johnson, W. (1944). Studies in language behavior: I. A program of
+            research. Psychological Monographs, 56(2), 1-15.
     Args:
-        text: Input text to analyze
-        text_id: Optional identifier for the text (for tracking purposes)
-        chunk_size: Number of words per chunk (default: 1000).
-            Note: The stylometry-ttr package handles its own internal chunking,
-            so this parameter is included for API consistency but actual chunking
-            behavior is delegated to stylometry-ttr.
+        text: Input text to analyse.
+        text_id: Optional identifier for the text (stored in metadata).
+        chunk_size: Number of words per chunk for STTR and per-chunk
+            distributions (default: 1000).
     Returns:
-        TTRResult with all TTR variants and metadata, including Distribution
-        objects for stylometric fingerprinting.
+        TTRResult with all TTR variants, Distribution objects, and metadata.
     Example:
         >>> result = compute_ttr("The quick brown fox jumps over the lazy dog.")
         >>> print(f"Raw TTR: {result.ttr:.3f}")
-        Raw TTR: 0.900
+        Raw TTR: 1.000
         >>> print(f"Root TTR: {result.root_ttr:.3f}")
-        Root TTR: 2.846
-        >>> print(f"STTR: {result.sttr:.3f}")
-        STTR: 1.000
+        Root TTR: 3.000
         >>> # With text identifier
         >>> result = compute_ttr("Sample text here.", text_id="sample-001")
         >>> print(result.metadata["text_id"])
         sample-001
     """
-    try:
-        from stylometry_ttr import compute_ttr as _compute_ttr
-    except ImportError as e:
-        raise ImportError(
-            "TTR metrics require the stylometry-ttr package. "
-            "This should have been installed as a core dependency. "
-            "Install with: pip install stylometry-ttr"
-        ) from e
-    # Call the stylometry-ttr compute_ttr function
-    # Note: stylometry-ttr requires text_id to be a string, not None
-    ttr_result = _compute_ttr(text, text_id=text_id or "")
-    # Extract values, handling None for short texts
-    ttr_val = ttr_result.ttr
-    root_ttr_val = ttr_result.root_ttr
-    log_ttr_val = ttr_result.log_ttr
-    sttr_val = ttr_result.sttr if ttr_result.sttr is not None else 0.0
-    delta_std_val = ttr_result.delta_std if ttr_result.delta_std is not None else 0.0
-    # Create single-value distributions from stylometry-ttr results
-    # The stylometry-ttr package handles its own internal chunking for STTR
-    # so we wrap the aggregate results in Distribution objects
-    ttr_dist = (
-        make_distribution([ttr_val])
-        if ttr_val is not None
-        else Distribution(
-            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+    # Tokenise using pystylometry's own tokenizer (lowercase, words only)
+    tokenizer = Tokenizer(lowercase=True, strip_punctuation=True)
+    tokens = tokenizer.tokenize(text)
+    total_words = len(tokens)
+    # --- empty / trivial text --------------------------------------------------
+    if total_words == 0:
+        empty_dist = Distribution(
+            values=[],
+            mean=float("nan"),
+            median=float("nan"),
+            std=0.0,
+            range=0.0,
+            iqr=0.0,
         )
-    )
-    root_ttr_dist = (
-        make_distribution([root_ttr_val])
-        if root_ttr_val is not None
-        else Distribution(
-            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        return TTRResult(
+            total_words=0,
+            unique_words=0,
+            ttr=0.0,
+            root_ttr=0.0,
+            log_ttr=0.0,
+            sttr=0.0,
+            delta_std=0.0,
+            ttr_dist=empty_dist,
+            root_ttr_dist=empty_dist,
+            log_ttr_dist=empty_dist,
+            sttr_dist=empty_dist,
+            delta_std_dist=empty_dist,
+            chunk_size=chunk_size,
+            chunk_count=0,
+            metadata={
+                "text_id": text_id or "",
+                "sttr_available": False,
+                "delta_std_available": False,
+            },
         )
-    )
-    log_ttr_dist = (
-        make_distribution([log_ttr_val])
-        if log_ttr_val is not None
-        else Distribution(
-            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
-        )
-    )
-    sttr_dist = (
-        make_distribution([sttr_val])
-        if ttr_result.sttr is not None
-        else Distribution(
-            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+    # --- global metrics --------------------------------------------------------
+    unique_words = len(set(tokens))
+    ttr_val = unique_words / total_words
+    root_ttr_val = unique_words / math.sqrt(total_words)
+    log_ttr_val = math.log(unique_words) / math.log(total_words) if total_words > 1 else 0.0
+    # --- per-chunk metrics -----------------------------------------------------
+    chunk_ttrs = _compute_chunk_ttrs(tokens, chunk_size)
+    chunk_count = len(chunk_ttrs)
+    # STTR: mean TTR across chunks (only meaningful with enough text)
+    sttr_available = total_words >= _MIN_WORDS_FOR_STTR and chunk_count >= 1
+    if sttr_available:
+        sttr_val = statistics.mean(chunk_ttrs)
+    else:
+        sttr_val = 0.0
+    # Delta metrics
+    delta_mean, delta_std_val, delta_min, delta_max = _compute_deltas(chunk_ttrs)
+    delta_std_available = delta_std_val is not None
+    if delta_std_val is None:
+        delta_std_val = 0.0
+    # --- build Distribution objects --------------------------------------------
+    # For per-chunk distributions: compute root_ttr and log_ttr per chunk as well
+    if chunk_count >= 1:
+        ttr_dist = make_distribution(chunk_ttrs)
+        # Root TTR per chunk: for each chunk of chunk_size tokens,
+        # root_ttr = unique / sqrt(chunk_size)
+        root_ttr_chunks = [
+            len(set(tokens[i : i + chunk_size])) / math.sqrt(chunk_size)
+            for i in range(0, total_words - chunk_size + 1, chunk_size)
+        ]
+        root_ttr_dist = make_distribution(root_ttr_chunks)
+        # Log TTR per chunk
+        log_ttr_chunks = []
+        for i in range(0, total_words - chunk_size + 1, chunk_size):
+            chunk = tokens[i : i + chunk_size]
+            u = len(set(chunk))
+            t = len(chunk)
+            val = math.log(u) / math.log(t) if t > 1 else 0.0
+            log_ttr_chunks.append(val)
+        log_ttr_dist = make_distribution(log_ttr_chunks)
+        sttr_dist = (
+            make_distribution(chunk_ttrs) if sttr_available else make_distribution([sttr_val])
         )
-    )
-    delta_std_dist = (
-        make_distribution([delta_std_val])
-        if ttr_result.delta_std is not None
-        else Distribution(
-            values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
+        delta_std_dist = (
+            make_distribution([delta_std_val]) if delta_std_available else make_distribution([0.0])
         )
-    )
+    else:
+        # Not enough text for any chunks — wrap globals in single-value dists
+        ttr_dist = make_distribution([ttr_val])
+        root_ttr_dist = make_distribution([root_ttr_val])
+        log_ttr_dist = make_distribution([log_ttr_val])
+        sttr_dist = make_distribution([sttr_val])
+        delta_std_dist = make_distribution([0.0])
-    # Convert to our TTRResult dataclass
     return TTRResult(
-        total_words=ttr_result.total_words,
-        unique_words=ttr_result.unique_words,
-        ttr=ttr_val if ttr_val is not None else float("nan"),
-        root_ttr=root_ttr_val if root_ttr_val is not None else float("nan"),
-        log_ttr=log_ttr_val if log_ttr_val is not None else float("nan"),
-        sttr=sttr_val,
-        delta_std=delta_std_val,
+        total_words=total_words,
+        unique_words=unique_words,
+        ttr=round(ttr_val, 6),
+        root_ttr=round(root_ttr_val, 4),
+        log_ttr=round(log_ttr_val, 6),
+        sttr=round(sttr_val, 6),
+        delta_std=round(delta_std_val, 6),
         ttr_dist=ttr_dist,
         root_ttr_dist=root_ttr_dist,
         log_ttr_dist=log_ttr_dist,
         sttr_dist=sttr_dist,
         delta_std_dist=delta_std_dist,
         chunk_size=chunk_size,
-        chunk_count=1,  # stylometry-ttr returns aggregate results
+        chunk_count=chunk_count if chunk_count >= 1 else 1,
         metadata={
             "text_id": text_id or "",
-            "source": "stylometry-ttr",
-            "sttr_available": ttr_result.sttr is not None,
-            "delta_std_available": ttr_result.delta_std is not None,
+            "sttr_available": sttr_available,
+            "delta_std_available": delta_std_available,
         },
     )
+# ---------------------------------------------------------------------------
+# Aggregation
+# ---------------------------------------------------------------------------
+class TTRAggregator:
+    """Aggregate per-text TTR results into group-level statistics.
+    Useful for comparing vocabulary richness across authors, genres, or
+    time periods by computing summary statistics (mean, std, min, max,
+    median) over a collection of ``TTRResult`` objects.
+    Related GitHub Issue:
+        #43 - Inline stylometry-ttr into pystylometry
+        https://github.com/craigtrim/pystylometry/issues/43
+    Example:
+        >>> from pystylometry.lexical import compute_ttr, TTRAggregator
+        >>> results = [compute_ttr(t) for t in texts]
+        >>> agg = TTRAggregator()
+        >>> stats = agg.aggregate(results, group_id="Shakespeare")
+        >>> print(stats.ttr_mean)
+        0.412
+    """
+    def aggregate(self, results: list[TTRResult], group_id: str) -> TTRAggregateResult:
+        """Compute aggregate statistics from multiple TTR results.
+        Args:
+            results: List of per-text ``TTRResult`` objects.
+            group_id: Identifier for the group (e.g. author name).
+        Returns:
+            ``TTRAggregateResult`` with group-level statistics.
+        Raises:
+            ValueError: If *results* is empty.
+        """
+        if not results:
+            raise ValueError("Cannot aggregate empty results list")
+        ttrs = [r.ttr for r in results]
+        root_ttrs = [r.root_ttr for r in results]
+        log_ttrs = [r.log_ttr for r in results]
+        sttrs = [r.sttr for r in results if r.metadata.get("sttr_available")]
+        delta_stds = [r.delta_std for r in results if r.metadata.get("delta_std_available")]
+        return TTRAggregateResult(
+            group_id=group_id,
+            text_count=len(results),
+            total_words=sum(r.total_words for r in results),
+            ttr_mean=round(statistics.mean(ttrs), 6),
+            ttr_std=round(statistics.stdev(ttrs), 6) if len(ttrs) > 1 else 0.0,
+            ttr_min=round(min(ttrs), 6),
+            ttr_max=round(max(ttrs), 6),
+            ttr_median=round(statistics.median(ttrs), 6),
+            root_ttr_mean=round(statistics.mean(root_ttrs), 4),
+            root_ttr_std=round(statistics.stdev(root_ttrs), 4) if len(root_ttrs) > 1 else 0.0,
+            log_ttr_mean=round(statistics.mean(log_ttrs), 6),
+            log_ttr_std=round(statistics.stdev(log_ttrs), 6) if len(log_ttrs) > 1 else 0.0,
+            sttr_mean=round(statistics.mean(sttrs), 6) if sttrs else None,
+            sttr_std=round(statistics.stdev(sttrs), 6) if len(sttrs) > 1 else None,
+            delta_std_mean=round(statistics.mean(delta_stds), 6) if delta_stds else None,
+            metadata={"group_id": group_id, "text_count": len(results)},
+        )

pystylometry-1.3.5.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Craig Trim
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pystylometry-1.3.5.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,78 @@
+Metadata-Version: 2.1
+Name: pystylometry
+Version: 1.3.5
+Summary: Comprehensive Python package for stylometric analysis
+License: MIT
+Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
+Author: Craig Trim
+Author-email: craigtrim@gmail.com
+Requires-Python: >=3.9,<4.0
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Text Processing :: Linguistic
+Classifier: Typing :: Typed
+Project-URL: Homepage, https://github.com/craigtrim/pystylometry
+Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
+Project-URL: Repository, https://github.com/craigtrim/pystylometry
+Description-Content-Type: text/markdown
+# pystylometry
+[![PyPI version](https://badge.fury.io/py/pystylometry.svg)](https://badge.fury.io/py/pystylometry)
+[![Downloads](https://static.pepy.tech/badge/pystylometry)](https://pepy.tech/project/pystylometry)
+[![Downloads/Month](https://static.pepy.tech/badge/pystylometry/month)](https://pepy.tech/project/pystylometry)
+[![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
+[![Tests](https://img.shields.io/badge/tests-1022%20passed-brightgreen)]()
+Stylometric analysis and authorship attribution for Python. 50+ metrics across 11 modules, from vocabulary diversity to AI-generation detection.
+## Install
+```bash
+pip install pystylometry              # Core (lexical metrics)
+pip install pystylometry[all]         # Everything
+```
+## Modules
+| Module | Metrics | Description |
+|--------|---------|-------------|
+| [**lexical**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/lexical) | TTR, MTLD, Yule's K/I, Hapax, MATTR, VocD-D, HD-D, MSTTR, function words, word frequency | Vocabulary diversity and richness |
+| [**readability**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/readability) | Flesch, Flesch-Kincaid, SMOG, Gunning Fog, Coleman-Liau, ARI, Dale-Chall, Fry, FORCAST, Linsear Write, Powers-Sumner-Kearl | Grade-level and difficulty scoring |
+| [**syntactic**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/syntactic) | POS ratios, sentence types, parse tree depth, clausal density, passive voice, T-units, dependency distance | Sentence and parse structure (requires spaCy) |
+| [**authorship**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/authorship) | Burrows' Delta, Cosine Delta, Zeta, Kilgarriff chi-squared, MinMax, John's Delta, NCD | Author attribution and text comparison |
+| [**stylistic**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/stylistic) | Contractions, hedges, intensifiers, modals, punctuation, vocabulary overlap (Jaccard/Dice/Cosine/KL), cohesion, genre/register | Style markers and text similarity |
+| [**character**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/character) | Letter frequencies, digit/uppercase ratios, special characters, whitespace | Character-level fingerprinting |
+| [**ngrams**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/ngrams) | Word/character/POS n-grams, Shannon entropy, skipgrams | N-gram profiles and entropy |
+| [**dialect**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/dialect) | British/American classification, spelling/grammar/vocabulary markers, markedness | Regional dialect detection |
+| [**consistency**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/consistency) | Sliding-window chi-squared drift, pattern classification | Intra-document style analysis |
+| [**prosody**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/prosody) | Syllable stress, rhythm regularity | Prose rhythm (requires spaCy) |
+| [**viz**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/viz) | Timeline, scatter, report (PNG + interactive HTML) | Drift detection visualization |
+## Development
+```bash
+git clone https://github.com/craigtrim/pystylometry && cd pystylometry
+pip install -e ".[dev,all]"
+make test       # 1022 tests
+make lint       # ruff + mypy
+make all        # lint + test + build
+```
+## License
+MIT
+## Author
+Craig Trim -- craigtrim@gmail.com

{pystylometry-1.3.0.dist-info → pystylometry-1.3.5.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 pystylometry/README.md,sha256=WFOtCAF3qtDTgGG3a_jTjNSwVgpQEXI1PKqbVBfyo1M,2366
-pystylometry/__init__.py,sha256=Z6zkHlX05SUeObDca9dL1Gkfq4UPBWbU2M4sp4fVj78,9220
+pystylometry/__init__.py,sha256=bZ8xk66Mx7gj3K_I6594DoqATIGv1FtLFSJmF6Dz1g4,10462
 pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
-pystylometry/_types.py,sha256=HddTq-8kGeXyTXFkUd26HmOlOhCOdIgEVULHp168ais,76563
+pystylometry/_types.py,sha256=_YCkVyvHulmKkvmjzb73dcCOWJwiJZVhkV7sJcMr4YY,83618
 pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
 pystylometry/authorship/README.md,sha256=zNXCpLj7nczPnYykJnCUw3y-kxfC9mWZmngi3nfw6us,1016
 pystylometry/authorship/__init__.py,sha256=D7m38hWi_62o1ZDSrghLCfob9YsykTht4K37wiVgHfg,1530
@@ -13,7 +13,7 @@ pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeo
 pystylometry/character/README.md,sha256=poQwhbI8MabVD_626CWjEL87IOX5YDGS0ZJTH1hNwEE,607
 pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
 pystylometry/character/character_metrics.py,sha256=OCIGP_ivtwtzcifcxcbmp2R5SIKh2tKyvKcHAv64S8g,14029
-pystylometry/cli.py,sha256=z0yx2O_E05tHT9_BHgSaQ2zq5_fBERXfhbYHcuQ2y-A,15477
+pystylometry/cli.py,sha256=HvzBZxFSiS5AAXCb6N9Eo3QonkH-ucRFp6xDF1kJTQ0,26380
 pystylometry/consistency/README.md,sha256=HG_Rd6WRBnIz3M7J11dVDv1S2ARkMABFYrTn-VV8xRY,1058
 pystylometry/consistency/__init__.py,sha256=l7nzpS7M4yHDBbM2LGAtW0XGT2n7YjSey_1xKf45224,2181
 pystylometry/consistency/_thresholds.py,sha256=5fZwdJ_cnDy0ED7CCYs6V_zP6kIAR1p0h0NYkbZ0HRg,6381
@@ -24,12 +24,13 @@ pystylometry/dialect/_data/dialect_markers.json,sha256=DthluOA6q0rG_8IrCrFIYWh_E
 pystylometry/dialect/_loader.py,sha256=M2ATp-5754v_yX9EWvBP0r5qgNf8xlL8XadVsVb_Hco,12989
 pystylometry/dialect/detector.py,sha256=9x0ZuIfTIjsmdNSx0Ezy5AC0SAFtC4kVw11iOSBd9gQ,20147
 pystylometry/lexical/README.md,sha256=cFQ7KRZV4ubsQwIlOH3YHTbhhNl5X91Sr3zcn-3x0HI,1185
-pystylometry/lexical/__init__.py,sha256=HTncnGVZgpktZqpf-r4_HI_9Jq42WkZZKXn8nho3y3s,751
+pystylometry/lexical/__init__.py,sha256=ib_F-NGVydLNGT_HgaWurBT25AadTE4eNcAN1lGMKmQ,934
 pystylometry/lexical/advanced_diversity.py,sha256=rL1hlNqTnaEFcA2v4oBJlojHZMTqdvvm4jYXTFGVpYE,25664
 pystylometry/lexical/function_words.py,sha256=eel9bq_qWgWlvG0NtDiouilMt9kaFqz2rh3add2UC4U,17832
 pystylometry/lexical/hapax.py,sha256=djTqZyZIYXa3GRiPoy6TTGHPm0wCRNJ9U0Rwnf5NoDk,12173
 pystylometry/lexical/mtld.py,sha256=XpeCF8sOXZhWbaazHGuqm08mrOf_DYfkfGGAltWnyy4,7101
-pystylometry/lexical/ttr.py,sha256=iEsXkoSPyZEyiiFwKatKA8KhLRukD7RDRvyRkRQOTsk,5848
+pystylometry/lexical/repetition.py,sha256=A9L0oNwfnCepVkWy57kjHV47Pw4M6fZXEl25hBVdq2s,18318
+pystylometry/lexical/ttr.py,sha256=igS8gnvIv57zvjQPtmIgkB5Wj7jdaKSMRpJ1WvMfKtw,13091
 pystylometry/lexical/word_frequency_sophistication.py,sha256=OHOS0fBvd1Bz8zsJk-pJbWLTgImmBd-aewQnp_kq8BY,38828
 pystylometry/lexical/yule.py,sha256=NXggha8jmQCu4i-qKZpISwyJBqNpuPHyVR86BLDLgio,5192
 pystylometry/ngrams/README.md,sha256=50wyaWcLGbosLzTPR1cXdE_xAVU8jVY7fd3ReEk9KnY,802
@@ -70,7 +71,8 @@ pystylometry/viz/jsx/_base.py,sha256=nd7kEc13fUcRMom3A5jqjGyTy-djIeydq2k3oPHZIHY
 pystylometry/viz/jsx/report.py,sha256=DbbHnnNAEi5tmVg4PmiHb17vkBBXujyE4x1CfVBiOBw,25857
 pystylometry/viz/jsx/timeline.py,sha256=hor-xnBa6oVkSqN0AEZUCQFBOB-iTfHSFZHiEfeakPA,30716
 pystylometry/viz/jsx/viewer.py,sha256=3LO49d_2bRf_P-P-2oSKpKx4N8Ugo4oCLb3DtvyNxXI,43716
-pystylometry-1.3.0.dist-info/METADATA,sha256=wsQ5QTEH7i6hpePEnlfDgJFKVHJi1m-HpMcHuznQt3c,5706
-pystylometry-1.3.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-pystylometry-1.3.0.dist-info/entry_points.txt,sha256=iHOaFXlyiwcQM1LlID2gWGmN4DBLdTSpKGjttU8tgm8,113
-pystylometry-1.3.0.dist-info/RECORD,,
+pystylometry-1.3.5.dist-info/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
+pystylometry-1.3.5.dist-info/METADATA,sha256=DEFzNf_ZQd3mulwOnFMRSyc1s30alGM6UtO-L7nloPc,4779
+pystylometry-1.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+pystylometry-1.3.5.dist-info/entry_points.txt,sha256=XsJvKgKs3LRDuzdF45JO7ZnS0wGKg1f5qsxVYSZzLp8,165
+pystylometry-1.3.5.dist-info/RECORD,,

{pystylometry-1.3.0.dist-info → pystylometry-1.3.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.3.0
+Generator: poetry-core 1.9.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{pystylometry-1.3.0.dist-info → pystylometry-1.3.5.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 [console_scripts]
 pystylometry-drift=pystylometry.cli:drift_cli
+pystylometry-tokenize=pystylometry.cli:tokenize_cli
 pystylometry-viewer=pystylometry.cli:viewer_cli

pystylometry 1.3.0__py3-none-any.whl → 1.3.5__py3-none-any.whl

pystylometry 1.3.0py3-none-any.whl → 1.3.5py3-none-any.whl