PyPI - pystylometry - Versions diffs - 1.1.0__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

pystylometry 1.1.0py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

pystylometry/README.md +42 -0
pystylometry/__init__.py +17 -1
pystylometry/_types.py +206 -0
pystylometry/authorship/README.md +21 -0
pystylometry/authorship/__init__.py +9 -6
pystylometry/authorship/additional_methods.py +262 -17
pystylometry/authorship/compression.py +175 -0
pystylometry/authorship/kilgarriff.py +8 -1
pystylometry/character/README.md +17 -0
pystylometry/consistency/README.md +27 -0
pystylometry/dialect/README.md +26 -0
pystylometry/lexical/README.md +23 -0
pystylometry/lexical/__init__.py +3 -0
pystylometry/lexical/repetition.py +506 -0
pystylometry/ngrams/README.md +18 -0
pystylometry/ngrams/extended_ngrams.py +314 -69
pystylometry/prosody/README.md +17 -0
pystylometry/prosody/rhythm_prosody.py +773 -11
pystylometry/readability/README.md +23 -0
pystylometry/stylistic/README.md +20 -0
pystylometry/stylistic/cohesion_coherence.py +669 -13
pystylometry/stylistic/genre_register.py +1560 -17
pystylometry/stylistic/markers.py +611 -17
pystylometry/stylistic/vocabulary_overlap.py +354 -13
pystylometry/syntactic/README.md +20 -0
pystylometry/viz/README.md +27 -0
pystylometry-1.3.1.dist-info/LICENSE +21 -0
pystylometry-1.3.1.dist-info/METADATA +79 -0
{pystylometry-1.1.0.dist-info → pystylometry-1.3.1.dist-info}/RECORD +31 -16
{pystylometry-1.1.0.dist-info → pystylometry-1.3.1.dist-info}/WHEEL +1 -1
pystylometry-1.1.0.dist-info/METADATA +0 -278
{pystylometry-1.1.0.dist-info → pystylometry-1.3.1.dist-info}/entry_points.txt +0 -0

pystylometry/stylistic/vocabulary_overlap.py CHANGED Viewed

@@ -10,15 +10,275 @@ Related GitHub Issue:
 References:
     Jaccard, P. (1912). The distribution of the flora in the alpine zone.
-    Salton, G., & McGill, M. J. (1983). Introduction to Modern Information Retrieval.
+        New Phytologist, 11(2), 37-50.
+    Sørensen, T. (1948). A method of establishing groups of equal amplitude in
+        plant sociology based on similarity of species. Kongelige Danske
+        Videnskabernes Selskab, 5(4), 1-34.
+    Salton, G., & McGill, M. J. (1983). Introduction to Modern Information
+        Retrieval. McGraw-Hill.
+    Kullback, S., & Leibler, R. A. (1951). On Information and Sufficiency.
+        Annals of Mathematical Statistics, 22(1), 79-86.
+    Manning, C. D., & Schütze, H. (1999). Foundations of Statistical NLP.
+        MIT Press.
 """
+from __future__ import annotations
+import math
+import re
+from collections import Counter
 from .._types import VocabularyOverlapResult
-def compute_vocabulary_overlap(text1: str, text2: str) -> VocabularyOverlapResult:
+def _tokenize(text: str) -> list[str]:
+    """Tokenize text into lowercase words.
+    Uses a simple regex-based tokenizer that extracts word characters.
+    Converts to lowercase for case-insensitive comparison.
+    Args:
+        text: Input text to tokenize
+    Returns:
+        List of lowercase word tokens
+    """
+    # Match word characters, convert to lowercase
+    tokens = re.findall(r"\b[a-zA-Z]+\b", text.lower())
+    return tokens
+def _compute_jaccard(set1: set[str], set2: set[str]) -> float:
+    """Compute Jaccard similarity coefficient.
+    The Jaccard index measures similarity as the size of the intersection
+    divided by the size of the union of two sets.
+    J(A, B) = |A ∩ B| / |A ∪ B|
+    Args:
+        set1: First vocabulary set
+        set2: Second vocabulary set
+    Returns:
+        Jaccard similarity coefficient (0.0 to 1.0)
+    References:
+        Jaccard, P. (1912). The distribution of the flora in the alpine zone.
+    """
+    if not set1 and not set2:
+        return 1.0  # Both empty = identical
+    intersection = len(set1 & set2)
+    union = len(set1 | set2)
+    return intersection / union if union > 0 else 0.0
+def _compute_dice(set1: set[str], set2: set[str]) -> float:
+    """Compute Sørensen-Dice coefficient.
+    The Dice coefficient is similar to Jaccard but weights the intersection
+    more heavily. Also known as the Sørensen-Dice index.
+    D(A, B) = 2|A ∩ B| / (|A| + |B|)
+    Args:
+        set1: First vocabulary set
+        set2: Second vocabulary set
+    Returns:
+        Dice coefficient (0.0 to 1.0)
+    References:
+        Sørensen, T. (1948). A method of establishing groups of equal amplitude
+            in plant sociology based on similarity of species.
     """
-    Compute vocabulary overlap and similarity between two texts.
+    if not set1 and not set2:
+        return 1.0  # Both empty = identical
+    intersection = len(set1 & set2)
+    total_size = len(set1) + len(set2)
+    return (2 * intersection) / total_size if total_size > 0 else 0.0
+def _compute_overlap_coefficient(set1: set[str], set2: set[str]) -> float:
+    """Compute overlap coefficient.
+    The overlap coefficient measures the overlap relative to the smaller set.
+    Useful when comparing texts of very different lengths.
+    O(A, B) = |A ∩ B| / min(|A|, |B|)
+    Args:
+        set1: First vocabulary set
+        set2: Second vocabulary set
+    Returns:
+        Overlap coefficient (0.0 to 1.0)
+    """
+    if not set1 or not set2:
+        return 0.0 if set1 or set2 else 1.0
+    intersection = len(set1 & set2)
+    min_size = min(len(set1), len(set2))
+    return intersection / min_size if min_size > 0 else 0.0
+def _compute_cosine_similarity(freq1: Counter[str], freq2: Counter[str], vocab: set[str]) -> float:
+    """Compute cosine similarity between term frequency vectors.
+    Treats each text as a vector in vocabulary space where each dimension
+    is the frequency of a word. Computes the cosine of the angle between vectors.
+    cos(θ) = (A · B) / (||A|| × ||B||)
+    Args:
+        freq1: Word frequencies for text 1
+        freq2: Word frequencies for text 2
+        vocab: Combined vocabulary (union of both texts)
+    Returns:
+        Cosine similarity (-1.0 to 1.0, though word frequencies yield 0.0 to 1.0)
+    References:
+        Salton, G., & McGill, M. J. (1983). Introduction to Modern Information
+            Retrieval.
+    """
+    if not vocab:
+        return 1.0  # Both empty = identical
+    # Compute dot product and magnitudes
+    dot_product = 0.0
+    magnitude1 = 0.0
+    magnitude2 = 0.0
+    for word in vocab:
+        f1 = freq1.get(word, 0)
+        f2 = freq2.get(word, 0)
+        dot_product += f1 * f2
+        magnitude1 += f1 * f1
+        magnitude2 += f2 * f2
+    magnitude1 = math.sqrt(magnitude1)
+    magnitude2 = math.sqrt(magnitude2)
+    if magnitude1 == 0 or magnitude2 == 0:
+        return 0.0
+    return dot_product / (magnitude1 * magnitude2)
+def _compute_kl_divergence(
+    freq1: Counter[str], freq2: Counter[str], vocab: set[str], smoothing: float = 1e-10
+) -> float:
+    """Compute Kullback-Leibler divergence from text1 to text2.
+    KL divergence measures how one probability distribution diverges from
+    another. It is asymmetric: D_KL(P || Q) ≠ D_KL(Q || P).
+    D_KL(P || Q) = Σ P(x) log(P(x) / Q(x))
+    A small smoothing value is added to avoid division by zero when Q(x) = 0.
+    Args:
+        freq1: Word frequencies for text 1 (P distribution)
+        freq2: Word frequencies for text 2 (Q distribution)
+        vocab: Combined vocabulary (union of both texts)
+        smoothing: Small value added to probabilities to avoid log(0)
+    Returns:
+        KL divergence (non-negative, unbounded above)
+    Note:
+        Returns 0.0 for identical distributions. Higher values indicate
+        greater difference between distributions.
+    References:
+        Kullback, S., & Leibler, R. A. (1951). On Information and Sufficiency.
+    """
+    if not vocab:
+        return 0.0  # Both empty = identical
+    # Convert frequencies to probabilities
+    total1 = sum(freq1.values())
+    total2 = sum(freq2.values())
+    if total1 == 0 or total2 == 0:
+        return 0.0
+    kl_div = 0.0
+    for word in vocab:
+        p = (freq1.get(word, 0) / total1) + smoothing
+        q = (freq2.get(word, 0) / total2) + smoothing
+        kl_div += p * math.log(p / q)
+    return max(0.0, kl_div)  # Ensure non-negative due to smoothing artifacts
+def _compute_tfidf_distinctive_words(
+    freq1: Counter[str],
+    freq2: Counter[str],
+    unique_to_1: set[str],
+    unique_to_2: set[str],
+    top_n: int = 20,
+) -> tuple[list[tuple[str, float]], list[tuple[str, float]]]:
+    """Compute distinctive words for each text using TF-IDF-like scoring.
+    Words unique to each text are scored by their frequency, providing
+    a measure of how "distinctive" they are for that text.
+    For texts with shared vocabulary, the scoring considers relative
+    frequency differences.
+    Args:
+        freq1: Word frequencies for text 1
+        freq2: Word frequencies for text 2
+        unique_to_1: Words appearing only in text 1
+        unique_to_2: Words appearing only in text 2
+        top_n: Number of top distinctive words to return
+    Returns:
+        Tuple of (text1_distinctive, text2_distinctive) lists,
+        each containing (word, score) tuples sorted by score descending
+    """
+    # For unique words, score by frequency
+    text1_scores: list[tuple[str, float]] = []
+    for word in unique_to_1:
+        score = float(freq1[word])
+        text1_scores.append((word, score))
+    text2_scores: list[tuple[str, float]] = []
+    for word in unique_to_2:
+        score = float(freq2[word])
+        text2_scores.append((word, score))
+    # Sort by score descending
+    text1_scores.sort(key=lambda x: x[1], reverse=True)
+    text2_scores.sort(key=lambda x: x[1], reverse=True)
+    return text1_scores[:top_n], text2_scores[:top_n]
+def compute_vocabulary_overlap(
+    text1: str,
+    text2: str,
+    top_distinctive: int = 20,
+) -> VocabularyOverlapResult:
+    """Compute vocabulary overlap and similarity between two texts.
+    This function computes multiple similarity metrics based on vocabulary
+    comparison, useful for authorship verification, plagiarism detection,
+    and measuring stylistic consistency across texts.
+    Metrics computed:
+        - Jaccard similarity: intersection / union (set-based)
+        - Sørensen-Dice coefficient: 2 * intersection / (size1 + size2)
+        - Overlap coefficient: intersection / min(size1, size2)
+        - Cosine similarity: dot product of frequency vectors
+        - KL divergence: distributional difference (asymmetric)
     Related GitHub Issue:
         #21 - Vocabulary Overlap and Similarity Metrics
@@ -27,21 +287,102 @@ def compute_vocabulary_overlap(text1: str, text2: str) -> VocabularyOverlapResul
     Args:
         text1: First text to compare
         text2: Second text to compare
+        top_distinctive: Number of most distinctive words to return per text
     Returns:
-        VocabularyOverlapResult with Jaccard, Dice, cosine similarities,
-        shared vocabulary statistics, and distinctive words for each text.
+        VocabularyOverlapResult with similarity scores, vocabulary statistics,
+        shared vocabulary, and distinctive words for each text.
     Example:
-        >>> result = compute_vocabulary_overlap(text1, text2)
+        >>> result = compute_vocabulary_overlap(
+        ...     "The quick brown fox jumps over the lazy dog",
+        ...     "The fast brown fox leaps over the sleepy dog"
+        ... )
         >>> print(f"Jaccard similarity: {result.jaccard_similarity:.3f}")
-        Jaccard similarity: 0.456
+        Jaccard similarity: 0.583
         >>> print(f"Shared words: {result.shared_vocab_size}")
-        Shared words: 234
+        Shared words: 7
+        >>> print(f"Text1 distinctive: {result.text1_distinctive_words}")
+        [('quick', 1.0), ('jumps', 1.0), ('lazy', 1.0)]
+    References:
+        Jaccard, P. (1912). The distribution of the flora in the alpine zone.
+            New Phytologist, 11(2), 37-50.
+        Sørensen, T. (1948). A method of establishing groups of equal amplitude
+            in plant sociology based on similarity of species.
+        Salton, G., & McGill, M. J. (1983). Introduction to Modern Information
+            Retrieval. McGraw-Hill.
+        Kullback, S., & Leibler, R. A. (1951). On Information and Sufficiency.
+            Annals of Mathematical Statistics, 22(1), 79-86.
+        Manning, C. D., & Schütze, H. (1999). Foundations of Statistical NLP.
+            MIT Press.
     """
-    # TODO: Implement vocabulary overlap analysis
-    # GitHub Issue #21: https://github.com/craigtrim/pystylometry/issues/21
-    raise NotImplementedError(
-        "Vocabulary overlap not yet implemented. "
-        "See GitHub Issue #21: https://github.com/craigtrim/pystylometry/issues/21"
+    # Tokenize texts
+    tokens1 = _tokenize(text1)
+    tokens2 = _tokenize(text2)
+    # Build frequency counters and vocabulary sets
+    freq1: Counter[str] = Counter(tokens1)
+    freq2: Counter[str] = Counter(tokens2)
+    vocab1 = set(freq1.keys())
+    vocab2 = set(freq2.keys())
+    # Compute set operations
+    shared = vocab1 & vocab2
+    union = vocab1 | vocab2
+    unique_to_1 = vocab1 - vocab2
+    unique_to_2 = vocab2 - vocab1
+    # Compute similarity metrics
+    jaccard = _compute_jaccard(vocab1, vocab2)
+    dice = _compute_dice(vocab1, vocab2)
+    overlap = _compute_overlap_coefficient(vocab1, vocab2)
+    cosine = _compute_cosine_similarity(freq1, freq2, union)
+    kl_div = _compute_kl_divergence(freq1, freq2, union)
+    # Compute coverage ratios
+    text1_coverage = len(shared) / len(vocab1) if vocab1 else 0.0
+    text2_coverage = len(shared) / len(vocab2) if vocab2 else 0.0
+    # Get distinctive words
+    text1_distinctive, text2_distinctive = _compute_tfidf_distinctive_words(
+        freq1, freq2, unique_to_1, unique_to_2, top_distinctive
+    )
+    # Build shared words list (sorted by combined frequency)
+    shared_with_freq = [(word, freq1[word] + freq2[word]) for word in shared]
+    shared_with_freq.sort(key=lambda x: x[1], reverse=True)
+    shared_words = [word for word, _ in shared_with_freq]
+    return VocabularyOverlapResult(
+        # Similarity scores
+        jaccard_similarity=jaccard,
+        dice_coefficient=dice,
+        overlap_coefficient=overlap,
+        cosine_similarity=cosine,
+        kl_divergence=kl_div,
+        # Vocabulary sizes
+        text1_vocab_size=len(vocab1),
+        text2_vocab_size=len(vocab2),
+        shared_vocab_size=len(shared),
+        union_vocab_size=len(union),
+        text1_unique_count=len(unique_to_1),
+        text2_unique_count=len(unique_to_2),
+        # Shared and distinctive vocabulary
+        shared_words=shared_words,
+        text1_distinctive_words=text1_distinctive,
+        text2_distinctive_words=text2_distinctive,
+        # Coverage ratios
+        text1_coverage=text1_coverage,
+        text2_coverage=text2_coverage,
+        # Metadata
+        metadata={
+            "text1_token_count": len(tokens1),
+            "text2_token_count": len(tokens2),
+            "text1_frequencies": dict(freq1),
+            "text2_frequencies": dict(freq2),
+            "unique_to_text1": sorted(unique_to_1),
+            "unique_to_text2": sorted(unique_to_2),
+        },
     )

pystylometry/syntactic/README.md ADDED Viewed

@@ -0,0 +1,20 @@
+# syntactic
+![4 public functions](https://img.shields.io/badge/functions-4-blue)
+![Requires spaCy](https://img.shields.io/badge/requires-spaCy-orange)
+Sentence structure, part-of-speech, and parse tree analysis.
+## Catalogue
+| File | Function | What It Measures |
+|------|----------|-----------------|
+| `pos_ratios.py` | `compute_pos_ratios` | Noun/verb/adjective/adverb ratios |
+| `sentence_stats.py` | `compute_sentence_stats` | Sentence length, word length distributions |
+| `sentence_types.py` | `compute_sentence_types` | Declarative, interrogative, imperative, exclamatory classification |
+| `advanced_syntactic.py` | `compute_advanced_syntactic` | Parse tree depth, clausal density, passive voice, T-units, dependency distance, subordination/coordination ratios |
+## See Also
+- [`stylistic/`](../stylistic/) for higher-level style features built on syntactic foundations
+- [`ngrams/`](../ngrams/) for POS n-gram sequences via `compute_extended_ngrams(text, pos=True)`

pystylometry/viz/README.md ADDED Viewed

@@ -0,0 +1,27 @@
+# viz
+![6 public functions](https://img.shields.io/badge/functions-6-blue)
+![Optional: matplotlib](https://img.shields.io/badge/optional-matplotlib-yellow)
+Visualization for drift detection results. Two output modes: static PNG (matplotlib) and interactive HTML (React JSX).
+## Catalogue
+| File | Functions | Output |
+|------|-----------|--------|
+| `drift.py` | `plot_drift_timeline`, `plot_drift_scatter`, `plot_drift_report` | PNG via matplotlib/seaborn |
+| `jsx/report.py` | `export_drift_report_jsx` | Interactive HTML dashboard |
+| `jsx/timeline.py` | `export_drift_timeline_jsx` | Interactive HTML timeline |
+| `jsx/viewer.py` | `export_drift_viewer` | Standalone HTML viewer with file upload |
+| `jsx/_base.py` | _(internal)_ | React/JSX rendering base |
+## Install
+```
+pip install pystylometry[viz]   # For PNG output (matplotlib + seaborn)
+# JSX/HTML output requires no additional dependencies
+```
+## See Also
+- [`consistency/`](../consistency/) produces the `KilgarriffDriftResult` consumed by all viz functions

pystylometry-1.3.1.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Craig Trim
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pystylometry-1.3.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,79 @@
+Metadata-Version: 2.1
+Name: pystylometry
+Version: 1.3.1
+Summary: Comprehensive Python package for stylometric analysis
+License: MIT
+Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
+Author: Craig Trim
+Author-email: craigtrim@gmail.com
+Requires-Python: >=3.9,<4.0
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Text Processing :: Linguistic
+Classifier: Typing :: Typed
+Requires-Dist: stylometry-ttr (>=1.0.3,<2.0.0)
+Project-URL: Homepage, https://github.com/craigtrim/pystylometry
+Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
+Project-URL: Repository, https://github.com/craigtrim/pystylometry
+Description-Content-Type: text/markdown
+# pystylometry
+[![PyPI version](https://badge.fury.io/py/pystylometry.svg)](https://badge.fury.io/py/pystylometry)
+[![Downloads](https://static.pepy.tech/badge/pystylometry)](https://pepy.tech/project/pystylometry)
+[![Downloads/Month](https://static.pepy.tech/badge/pystylometry/month)](https://pepy.tech/project/pystylometry)
+[![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
+[![Tests](https://img.shields.io/badge/tests-1022%20passed-brightgreen)]()
+Stylometric analysis and authorship attribution for Python. 50+ metrics across 11 modules, from vocabulary diversity to AI-generation detection.
+## Install
+```bash
+pip install pystylometry              # Core (lexical metrics)
+pip install pystylometry[all]         # Everything
+```
+## Modules
+| Module | Metrics | Description |
+|--------|---------|-------------|
+| [**lexical**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/lexical) | TTR, MTLD, Yule's K/I, Hapax, MATTR, VocD-D, HD-D, MSTTR, function words, word frequency | Vocabulary diversity and richness |
+| [**readability**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/readability) | Flesch, Flesch-Kincaid, SMOG, Gunning Fog, Coleman-Liau, ARI, Dale-Chall, Fry, FORCAST, Linsear Write, Powers-Sumner-Kearl | Grade-level and difficulty scoring |
+| [**syntactic**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/syntactic) | POS ratios, sentence types, parse tree depth, clausal density, passive voice, T-units, dependency distance | Sentence and parse structure (requires spaCy) |
+| [**authorship**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/authorship) | Burrows' Delta, Cosine Delta, Zeta, Kilgarriff chi-squared, MinMax, John's Delta, NCD | Author attribution and text comparison |
+| [**stylistic**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/stylistic) | Contractions, hedges, intensifiers, modals, punctuation, vocabulary overlap (Jaccard/Dice/Cosine/KL), cohesion, genre/register | Style markers and text similarity |
+| [**character**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/character) | Letter frequencies, digit/uppercase ratios, special characters, whitespace | Character-level fingerprinting |
+| [**ngrams**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/ngrams) | Word/character/POS n-grams, Shannon entropy, skipgrams | N-gram profiles and entropy |
+| [**dialect**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/dialect) | British/American classification, spelling/grammar/vocabulary markers, markedness | Regional dialect detection |
+| [**consistency**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/consistency) | Sliding-window chi-squared drift, pattern classification | Intra-document style analysis |
+| [**prosody**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/prosody) | Syllable stress, rhythm regularity | Prose rhythm (requires spaCy) |
+| [**viz**](https://github.com/craigtrim/pystylometry/tree/master/pystylometry/viz) | Timeline, scatter, report (PNG + interactive HTML) | Drift detection visualization |
+## Development
+```bash
+git clone https://github.com/craigtrim/pystylometry && cd pystylometry
+pip install -e ".[dev,all]"
+make test       # 1022 tests
+make lint       # ruff + mypy
+make all        # lint + test + build
+```
+## License
+MIT
+## Author
+Craig Trim -- craigtrim@gmail.com

{pystylometry-1.1.0.dist-info → pystylometry-1.3.1.dist-info}/RECORD RENAMED Viewed

@@ -1,35 +1,46 @@
-pystylometry/__init__.py,sha256=UQGe2EJUdMh1rE1zmIGNqrMgbipAhPDDU5Cvp_w--64,8594
+pystylometry/README.md,sha256=WFOtCAF3qtDTgGG3a_jTjNSwVgpQEXI1PKqbVBfyo1M,2366
+pystylometry/__init__.py,sha256=Z6zkHlX05SUeObDca9dL1Gkfq4UPBWbU2M4sp4fVj78,9220
 pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
-pystylometry/_types.py,sha256=OOKJ0Y_2OtaiQn_Y0EVHTOkPrNlWDhdu5Jl-4quuNZw,74257
+pystylometry/_types.py,sha256=g6XzwCHeMAIBfexId6Pd9EQfJzvZ0KYMfD4kpS5T7BQ,82284
 pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
-pystylometry/authorship/__init__.py,sha256=QJMZ9xx5mf3u9X-HAdZsgqzZGhi4G6Pgj7wjNYGholE,1343
-pystylometry/authorship/additional_methods.py,sha256=ZCYwM_OEq3fCZGkCL0wsOUBiMSvrF1paVON4xueJDW4,2583
+pystylometry/authorship/README.md,sha256=zNXCpLj7nczPnYykJnCUw3y-kxfC9mWZmngi3nfw6us,1016
+pystylometry/authorship/__init__.py,sha256=D7m38hWi_62o1ZDSrghLCfob9YsykTht4K37wiVgHfg,1530
+pystylometry/authorship/additional_methods.py,sha256=jvEg6TMI55jhkDt1jpC-08iXTzz6TaNmKOkJy5qNF0c,11487
 pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
-pystylometry/authorship/kilgarriff.py,sha256=Hqv5Ww7s_Tn4KSpcDAE_dTVESv4X3pglkQt0bYjQGW0,13097
+pystylometry/authorship/compression.py,sha256=qqUHDd7wWOB6Q2E97-cczBEWhKDTF3ynJUhbRqGq_RA,6296
+pystylometry/authorship/kilgarriff.py,sha256=oz4JbLnFEuPXZYLmhfkuapg516A554FvXvVNIVu7uKk,13379
 pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
+pystylometry/character/README.md,sha256=poQwhbI8MabVD_626CWjEL87IOX5YDGS0ZJTH1hNwEE,607
 pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
 pystylometry/character/character_metrics.py,sha256=OCIGP_ivtwtzcifcxcbmp2R5SIKh2tKyvKcHAv64S8g,14029
 pystylometry/cli.py,sha256=z0yx2O_E05tHT9_BHgSaQ2zq5_fBERXfhbYHcuQ2y-A,15477
+pystylometry/consistency/README.md,sha256=HG_Rd6WRBnIz3M7J11dVDv1S2ARkMABFYrTn-VV8xRY,1058
 pystylometry/consistency/__init__.py,sha256=l7nzpS7M4yHDBbM2LGAtW0XGT2n7YjSey_1xKf45224,2181
 pystylometry/consistency/_thresholds.py,sha256=5fZwdJ_cnDy0ED7CCYs6V_zP6kIAR1p0h0NYkbZ0HRg,6381
 pystylometry/consistency/drift.py,sha256=ZqK7YJXic8ceIfQLkH9ZtXFJCFyOuto5Mktz4qLG9ps,20682
+pystylometry/dialect/README.md,sha256=Bz0oGFRaWXjfZQqlMgvQ75rA9U0E67am2mJ9nWcSBhQ,1089
 pystylometry/dialect/__init__.py,sha256=6S4OKymniuDXPm3ZMqWyy9179RlWoLJoDzkCP4P7Jss,2486
 pystylometry/dialect/_data/dialect_markers.json,sha256=DthluOA6q0rG_8IrCrFIYWh_EMvINqYv7W664sEjNN4,51799
 pystylometry/dialect/_loader.py,sha256=M2ATp-5754v_yX9EWvBP0r5qgNf8xlL8XadVsVb_Hco,12989
 pystylometry/dialect/detector.py,sha256=9x0ZuIfTIjsmdNSx0Ezy5AC0SAFtC4kVw11iOSBd9gQ,20147
-pystylometry/lexical/__init__.py,sha256=HTncnGVZgpktZqpf-r4_HI_9Jq42WkZZKXn8nho3y3s,751
+pystylometry/lexical/README.md,sha256=cFQ7KRZV4ubsQwIlOH3YHTbhhNl5X91Sr3zcn-3x0HI,1185
+pystylometry/lexical/__init__.py,sha256=_VpemdfVYZYXHP4ulTItoyegJ-3lE85wlfzDCpseaNE,898
 pystylometry/lexical/advanced_diversity.py,sha256=rL1hlNqTnaEFcA2v4oBJlojHZMTqdvvm4jYXTFGVpYE,25664
 pystylometry/lexical/function_words.py,sha256=eel9bq_qWgWlvG0NtDiouilMt9kaFqz2rh3add2UC4U,17832
 pystylometry/lexical/hapax.py,sha256=djTqZyZIYXa3GRiPoy6TTGHPm0wCRNJ9U0Rwnf5NoDk,12173
 pystylometry/lexical/mtld.py,sha256=XpeCF8sOXZhWbaazHGuqm08mrOf_DYfkfGGAltWnyy4,7101
+pystylometry/lexical/repetition.py,sha256=A9L0oNwfnCepVkWy57kjHV47Pw4M6fZXEl25hBVdq2s,18318
 pystylometry/lexical/ttr.py,sha256=iEsXkoSPyZEyiiFwKatKA8KhLRukD7RDRvyRkRQOTsk,5848
 pystylometry/lexical/word_frequency_sophistication.py,sha256=OHOS0fBvd1Bz8zsJk-pJbWLTgImmBd-aewQnp_kq8BY,38828
 pystylometry/lexical/yule.py,sha256=NXggha8jmQCu4i-qKZpISwyJBqNpuPHyVR86BLDLgio,5192
+pystylometry/ngrams/README.md,sha256=50wyaWcLGbosLzTPR1cXdE_xAVU8jVY7fd3ReEk9KnY,802
 pystylometry/ngrams/__init__.py,sha256=eyITmSG4QP1NtVSagPsvc4j6W_E8TdB9wvBvXQHUnwo,379
 pystylometry/ngrams/entropy.py,sha256=i2RzYXrcTTIv6QaUCNQjAahL5LFOctG3ZE1OJ_tY4II,7246
-pystylometry/ngrams/extended_ngrams.py,sha256=OsBHTaaK73ZOhpS_yG2aWip1yWY2Fitdte0qx3wIshc,9475
+pystylometry/ngrams/extended_ngrams.py,sha256=288nrXbY6-PIJiQ3NaspnuRZ7qWakantnNKvtb5LhWI,18316
+pystylometry/prosody/README.md,sha256=YNTU0sTnXbCJ9GBPDDfTqHELr4YoF59_bg99ejPiqEE,608
 pystylometry/prosody/__init__.py,sha256=9tiD-U4sqEtUV8n9X339oF_C5tBNingjL-shGBXOrnY,265
-pystylometry/prosody/rhythm_prosody.py,sha256=V9OoxV5d4AZRZAb2HDY7-iEK1ijE7gtHhvFRD2DJvdA,1960
+pystylometry/prosody/rhythm_prosody.py,sha256=fifKW0FiRwC6xPX1NX0Yr4Il3APNfQiBEXB-uXXgZo8,28697
+pystylometry/readability/README.md,sha256=jj5I5525WRJceMJR8lECiZb-7y1nFzSK00GSotqupFs,1173
 pystylometry/readability/__init__.py,sha256=bJenjlGpNx7FF5AfOb6VA-wODdIa7Hc9iqoba1DLlh0,637
 pystylometry/readability/additional_formulas.py,sha256=nlVegnn_RRh6TP0BoLWlLBNnAgtFqLqyDsxFN_fUrAg,44993
 pystylometry/readability/ari.py,sha256=_wPl0FjEReLRHN0v4JQbRaU_kbikIxkr9mLO6hmNVyI,6833
@@ -39,17 +50,20 @@ pystylometry/readability/flesch.py,sha256=7kMeqpYnm-oqQGsDw7yJBhFecXB5ZRU9C8P4UK
 pystylometry/readability/gunning_fog.py,sha256=ntV90NUfqSm_84H1jBa2Fhr5DhlkderHLq8_z3khb48,8375
 pystylometry/readability/smog.py,sha256=8hdQQHUR9UBP-02AyZK3TbNhyyE1LQuZmlnVrs5Yvrk,5742
 pystylometry/readability/syllables.py,sha256=U_tO1fmdOh2xyIJVkFooGMhmZs1hqlFPBa9wBjEwLw8,4272
+pystylometry/stylistic/README.md,sha256=1GBo3AQ8f4ATap723is6pJtgUM9jmLy-hDOTcVWuI48,1020
 pystylometry/stylistic/__init__.py,sha256=nMykFZUCUKj-ZTk5H0OSKn24w6CSVEVIWieNG2B2hhc,581
-pystylometry/stylistic/cohesion_coherence.py,sha256=M_Pqfj0ZfCLDZBKFQCPx7rX9k6mxWFOjIsm1gsLdFyg,1618
-pystylometry/stylistic/genre_register.py,sha256=R32csC0M3eRcnACJNqMsyN-1ucMwdK8Twm5Tsa0Dd4k,1664
-pystylometry/stylistic/markers.py,sha256=s0ybwUZ6_wE064NXL9kQeTLKVeSHScFgZip7zkKYi2U,5134
-pystylometry/stylistic/vocabulary_overlap.py,sha256=TD8Rn32htB6MPHjc9xkr0LepJ6Q9k7f6uJvZt9_5aXA,1717
+pystylometry/stylistic/cohesion_coherence.py,sha256=9al3AYH2KQ62aluQJQr0pQHcNf1Aec6G8Oa9zux_uZk,23286
+pystylometry/stylistic/genre_register.py,sha256=4s-TxEBnFB-iog2yIO1RT6D66AQ3ChOjakRmOZzL8LM,41279
+pystylometry/stylistic/markers.py,sha256=AsuBsq5ZNTGHEp12AEL0mHj9XCJBKf3bwt7JW4H_xKs,24204
+pystylometry/stylistic/vocabulary_overlap.py,sha256=6ujoiE7TqrCiGEBrBuDeU6sdKSQYAG6IbrYVR3o9lMY,12931
+pystylometry/syntactic/README.md,sha256=0eQGqQz9MIE024_Oge4pq9LNdi-GmuTuAlz-DrK2jDI,982
 pystylometry/syntactic/__init__.py,sha256=B9qe0R7w9t5x2s2dXygSuvciuEHrScgD3CkxvPWKMPE,391
 pystylometry/syntactic/advanced_syntactic.py,sha256=ygbm7y1hrNJCaIxRCfZsafvt6BInh2iCTY1eWk2PdaE,19195
 pystylometry/syntactic/pos_ratios.py,sha256=lcvtx6tshVG6MpTWivyWnqFsjFXIHK3LCqyg2AL2AjY,7444
 pystylometry/syntactic/sentence_stats.py,sha256=SJg6TYCiT3gs2bXHYuEMSRgzFnxqOCH5q6WyhjXKgH4,4947
 pystylometry/syntactic/sentence_types.py,sha256=xEQPieGqTInCz9BinvItBX5Z_ofQ-BbFwTFNgY0jWx0,18730
 pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
+pystylometry/viz/README.md,sha256=mizuBpUzWgJqjC2u9C-Lu4sVDCcTQOgGsarRSkeWPf4,1031
 pystylometry/viz/__init__.py,sha256=3kHMAcJJi8oPhTqUZIRdyf311cdyPOHWaJIUv-w0V04,2219
 pystylometry/viz/drift.py,sha256=r98gQ4s_IlrEuaouxDMyue3cTjGqj10i4IeKC01IuCo,18956
 pystylometry/viz/jsx/__init__.py,sha256=ZCgbpMPhG5PiJ92IkJRrZwrb7RodZB9MyauO0MGgbRM,1107
@@ -57,7 +71,8 @@ pystylometry/viz/jsx/_base.py,sha256=nd7kEc13fUcRMom3A5jqjGyTy-djIeydq2k3oPHZIHY
 pystylometry/viz/jsx/report.py,sha256=DbbHnnNAEi5tmVg4PmiHb17vkBBXujyE4x1CfVBiOBw,25857
 pystylometry/viz/jsx/timeline.py,sha256=hor-xnBa6oVkSqN0AEZUCQFBOB-iTfHSFZHiEfeakPA,30716
 pystylometry/viz/jsx/viewer.py,sha256=3LO49d_2bRf_P-P-2oSKpKx4N8Ugo4oCLb3DtvyNxXI,43716
-pystylometry-1.1.0.dist-info/METADATA,sha256=QEXVX6buqxGwilLuOIvjFT2ZxoPNazUWX1iyhaM8vI8,8348
-pystylometry-1.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-pystylometry-1.1.0.dist-info/entry_points.txt,sha256=iHOaFXlyiwcQM1LlID2gWGmN4DBLdTSpKGjttU8tgm8,113
-pystylometry-1.1.0.dist-info/RECORD,,
+pystylometry-1.3.1.dist-info/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
+pystylometry-1.3.1.dist-info/METADATA,sha256=Nn-0-ABq9tykuxWpC79GkhHO71oWLnAseh0z9R3mycs,4813
+pystylometry-1.3.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+pystylometry-1.3.1.dist-info/entry_points.txt,sha256=iHOaFXlyiwcQM1LlID2gWGmN4DBLdTSpKGjttU8tgm8,113
+pystylometry-1.3.1.dist-info/RECORD,,

{pystylometry-1.1.0.dist-info → pystylometry-1.3.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.3.0
+Generator: poetry-core 1.9.0
 Root-Is-Purelib: true
 Tag: py3-none-any

pystylometry 1.1.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

pystylometry 1.1.0py3-none-any.whl → 1.3.1py3-none-any.whl