PyPI - pystylometry - Versions diffs - 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

pystylometry 1.0.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

pystylometry/README.md +42 -0
pystylometry/__init__.py +45 -3
pystylometry/_types.py +1017 -259
pystylometry/authorship/README.md +21 -0
pystylometry/authorship/__init__.py +28 -4
pystylometry/authorship/additional_methods.py +260 -40
pystylometry/authorship/compression.py +175 -0
pystylometry/authorship/kilgarriff.py +354 -0
pystylometry/character/README.md +17 -0
pystylometry/character/character_metrics.py +267 -179
pystylometry/cli.py +427 -0
pystylometry/consistency/README.md +27 -0
pystylometry/consistency/__init__.py +57 -0
pystylometry/consistency/_thresholds.py +162 -0
pystylometry/consistency/drift.py +549 -0
pystylometry/dialect/README.md +26 -0
pystylometry/dialect/__init__.py +65 -0
pystylometry/dialect/_data/dialect_markers.json +1134 -0
pystylometry/dialect/_loader.py +360 -0
pystylometry/dialect/detector.py +533 -0
pystylometry/lexical/README.md +23 -0
pystylometry/lexical/advanced_diversity.py +61 -22
pystylometry/lexical/function_words.py +255 -56
pystylometry/lexical/hapax.py +182 -52
pystylometry/lexical/mtld.py +108 -26
pystylometry/lexical/ttr.py +76 -10
pystylometry/lexical/word_frequency_sophistication.py +1522 -298
pystylometry/lexical/yule.py +136 -50
pystylometry/ngrams/README.md +18 -0
pystylometry/ngrams/entropy.py +150 -49
pystylometry/ngrams/extended_ngrams.py +314 -69
pystylometry/prosody/README.md +17 -0
pystylometry/prosody/rhythm_prosody.py +773 -11
pystylometry/readability/README.md +23 -0
pystylometry/readability/additional_formulas.py +1887 -762
pystylometry/readability/ari.py +144 -82
pystylometry/readability/coleman_liau.py +136 -109
pystylometry/readability/flesch.py +177 -73
pystylometry/readability/gunning_fog.py +165 -161
pystylometry/readability/smog.py +123 -42
pystylometry/stylistic/README.md +20 -0
pystylometry/stylistic/cohesion_coherence.py +669 -13
pystylometry/stylistic/genre_register.py +1560 -17
pystylometry/stylistic/markers.py +611 -17
pystylometry/stylistic/vocabulary_overlap.py +354 -13
pystylometry/syntactic/README.md +20 -0
pystylometry/syntactic/advanced_syntactic.py +76 -14
pystylometry/syntactic/pos_ratios.py +70 -6
pystylometry/syntactic/sentence_stats.py +55 -12
pystylometry/syntactic/sentence_types.py +71 -15
pystylometry/viz/README.md +27 -0
pystylometry/viz/__init__.py +71 -0
pystylometry/viz/drift.py +589 -0
pystylometry/viz/jsx/__init__.py +31 -0
pystylometry/viz/jsx/_base.py +144 -0
pystylometry/viz/jsx/report.py +677 -0
pystylometry/viz/jsx/timeline.py +716 -0
pystylometry/viz/jsx/viewer.py +1032 -0
pystylometry-1.3.0.dist-info/METADATA +136 -0
pystylometry-1.3.0.dist-info/RECORD +76 -0
{pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
pystylometry-1.0.0.dist-info/METADATA +0 -275
pystylometry-1.0.0.dist-info/RECORD +0 -46

pystylometry/README.md ADDED Viewed

@@ -0,0 +1,42 @@
+# pystylometry
+![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)
+![License: MIT](https://img.shields.io/badge/license-MIT-green)
+Core package for stylometric analysis and authorship attribution.
+## Module Map
+| Module | Purpose | Key Functions |
+|--------|---------|---------------|
+| [`lexical/`](lexical/) | Vocabulary diversity & richness | `compute_mtld`, `compute_yule`, `compute_ttr`, `compute_hapax_ratios` |
+| [`readability/`](readability/) | Text readability scoring | `compute_flesch`, `compute_gunning_fog`, `compute_ari`, `compute_smog` |
+| [`syntactic/`](syntactic/) | Sentence & parse structure | `compute_pos_ratios`, `compute_sentence_types`, `compute_advanced_syntactic` |
+| [`authorship/`](authorship/) | Author attribution & comparison | `compute_burrows_delta`, `compute_kilgarriff`, `compute_compression_distance` |
+| [`stylistic/`](stylistic/) | Style markers & vocabulary overlap | `compute_stylistic_markers`, `compute_vocabulary_overlap`, `compute_genre_register` |
+| [`character/`](character/) | Character-level features | `compute_character_metrics` |
+| [`ngrams/`](ngrams/) | N-gram entropy & sequences | `compute_extended_ngrams`, `compute_ngram_entropy` |
+| [`dialect/`](dialect/) | Regional dialect detection | `compute_dialect` |
+| [`consistency/`](consistency/) | Intra-document drift detection | `compute_kilgarriff_drift` |
+| [`prosody/`](prosody/) | Rhythm & stress patterns | `compute_rhythm_prosody` |
+| [`viz/`](viz/) | Visualization (PNG & interactive HTML) | `plot_drift_timeline`, `export_drift_report_jsx` |
+## Shared Internals
+| File | Purpose |
+|------|---------|
+| `_types.py` | All dataclass result types (e.g. `FleschResult`, `MTLDResult`, `KilgarriffDriftResult`) |
+| `_normalize.py` | Text normalization for readability and stylometry pipelines |
+| `_utils.py` | Shared tokenization and helper functions |
+| `tokenizer.py` | Configurable tokenizer with sentence/word splitting |
+| `cli.py` | Command-line interface (`pystylometry analyze`) |
+## Installation Extras
+```
+pip install pystylometry                  # Core (lexical only)
+pip install pystylometry[readability]     # + readability
+pip install pystylometry[syntactic]       # + syntactic (requires spaCy)
+pip install pystylometry[authorship]      # + authorship attribution
+pip install pystylometry[all]             # Everything
+```

pystylometry/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 pystylometry - Comprehensive Python package for stylometric analysis.
 A modular package for text analysis with lexical, readability, syntactic,
-authorship, and n-gram metrics.
+authorship, n-gram, dialect detection, and consistency analysis metrics.
 Installation:
     pip install pystylometry                    # Core (lexical only)
@@ -16,7 +16,9 @@ Usage:
     from pystylometry.lexical import compute_mtld, compute_yule
     from pystylometry.readability import compute_flesch
     from pystylometry.syntactic import compute_pos_ratios
-    from pystylometry.authorship import compute_burrows_delta
+    from pystylometry.authorship import compute_burrows_delta, compute_kilgarriff
+    from pystylometry.consistency import compute_kilgarriff_drift
+    from pystylometry.dialect import compute_dialect
     # Or use the unified analyze() function
     from pystylometry import analyze
@@ -24,6 +26,18 @@ Usage:
     results = analyze(text, lexical=True, readability=True)
     print(results.lexical['mtld'].mtld_average)
     print(results.readability['flesch'].reading_ease)
+    # Dialect detection
+    result = compute_dialect("The colour of the programme was brilliant.")
+    print(result.dialect)  # 'british'
+    print(result.british_score)  # 0.85
+    # Consistency analysis (Style Drift Detector - Issue #36)
+    from pystylometry.consistency import compute_kilgarriff_drift
+    result = compute_kilgarriff_drift(long_document)
+    print(result.pattern)  # 'consistent', 'sudden_spike', 'suspiciously_uniform', etc.
+    print(result.pattern_confidence)
 """
 from ._types import AnalysisResult
@@ -49,14 +63,28 @@ try:
 except ImportError:
     _SYNTACTIC_AVAILABLE = False
-# Authorship and ngrams use only stdlib (no external dependencies)
+# Prosody requires pronouncing (CMU dictionary) - same dependency as readability
+try:
+    from . import prosody  # noqa: F401 - Rhythm and prosody metrics (Issue #25)
+    _PROSODY_AVAILABLE = True
+except ImportError:
+    _PROSODY_AVAILABLE = False
+# Authorship, ngrams, dialect, consistency, and stylistic use only stdlib (no external dependencies)
 from . import (
     authorship,  # noqa: F401
+    consistency,  # noqa: F401 - Style drift detection (Issue #36)
+    dialect,  # noqa: F401
     ngrams,  # noqa: F401
+    stylistic,  # noqa: F401 - Vocabulary overlap and similarity (Issue #21)
 )
 _AUTHORSHIP_AVAILABLE = True
 _NGRAMS_AVAILABLE = True
+_DIALECT_AVAILABLE = True
+_CONSISTENCY_AVAILABLE = True
+_STYLISTIC_AVAILABLE = True
 def analyze(
@@ -177,6 +205,8 @@ def get_available_modules() -> dict[str, bool]:
         >>> available = get_available_modules()
         >>> if available['readability']:
         ...     from pystylometry.readability import compute_flesch
+        >>> if available['consistency']:
+        ...     from pystylometry.consistency import compute_kilgarriff_drift
     """
     return {
         "lexical": True,  # Always available
@@ -184,6 +214,10 @@ def get_available_modules() -> dict[str, bool]:
         "syntactic": _SYNTACTIC_AVAILABLE,
         "authorship": _AUTHORSHIP_AVAILABLE,
         "ngrams": _NGRAMS_AVAILABLE,
+        "dialect": _DIALECT_AVAILABLE,
+        "consistency": _CONSISTENCY_AVAILABLE,  # Style drift detection (Issue #36)
+        "stylistic": _STYLISTIC_AVAILABLE,  # Vocabulary overlap (Issue #21)
+        "prosody": _PROSODY_AVAILABLE,  # Rhythm and prosody (Issue #25)
     }
@@ -203,3 +237,11 @@ if _AUTHORSHIP_AVAILABLE:
     __all__.append("authorship")
 if _NGRAMS_AVAILABLE:
     __all__.append("ngrams")
+if _DIALECT_AVAILABLE:
+    __all__.append("dialect")
+if _CONSISTENCY_AVAILABLE:
+    __all__.append("consistency")
+if _STYLISTIC_AVAILABLE:
+    __all__.append("stylistic")
+if _PROSODY_AVAILABLE:
+    __all__.append("prosody")

pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

pystylometry 1.0.0py3-none-any.whl → 1.3.0py3-none-any.whl