pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. pystylometry/README.md +42 -0
  2. pystylometry/__init__.py +45 -3
  3. pystylometry/_types.py +1017 -259
  4. pystylometry/authorship/README.md +21 -0
  5. pystylometry/authorship/__init__.py +28 -4
  6. pystylometry/authorship/additional_methods.py +260 -40
  7. pystylometry/authorship/compression.py +175 -0
  8. pystylometry/authorship/kilgarriff.py +354 -0
  9. pystylometry/character/README.md +17 -0
  10. pystylometry/character/character_metrics.py +267 -179
  11. pystylometry/cli.py +427 -0
  12. pystylometry/consistency/README.md +27 -0
  13. pystylometry/consistency/__init__.py +57 -0
  14. pystylometry/consistency/_thresholds.py +162 -0
  15. pystylometry/consistency/drift.py +549 -0
  16. pystylometry/dialect/README.md +26 -0
  17. pystylometry/dialect/__init__.py +65 -0
  18. pystylometry/dialect/_data/dialect_markers.json +1134 -0
  19. pystylometry/dialect/_loader.py +360 -0
  20. pystylometry/dialect/detector.py +533 -0
  21. pystylometry/lexical/README.md +23 -0
  22. pystylometry/lexical/advanced_diversity.py +61 -22
  23. pystylometry/lexical/function_words.py +255 -56
  24. pystylometry/lexical/hapax.py +182 -52
  25. pystylometry/lexical/mtld.py +108 -26
  26. pystylometry/lexical/ttr.py +76 -10
  27. pystylometry/lexical/word_frequency_sophistication.py +1522 -298
  28. pystylometry/lexical/yule.py +136 -50
  29. pystylometry/ngrams/README.md +18 -0
  30. pystylometry/ngrams/entropy.py +150 -49
  31. pystylometry/ngrams/extended_ngrams.py +314 -69
  32. pystylometry/prosody/README.md +17 -0
  33. pystylometry/prosody/rhythm_prosody.py +773 -11
  34. pystylometry/readability/README.md +23 -0
  35. pystylometry/readability/additional_formulas.py +1887 -762
  36. pystylometry/readability/ari.py +144 -82
  37. pystylometry/readability/coleman_liau.py +136 -109
  38. pystylometry/readability/flesch.py +177 -73
  39. pystylometry/readability/gunning_fog.py +165 -161
  40. pystylometry/readability/smog.py +123 -42
  41. pystylometry/stylistic/README.md +20 -0
  42. pystylometry/stylistic/cohesion_coherence.py +669 -13
  43. pystylometry/stylistic/genre_register.py +1560 -17
  44. pystylometry/stylistic/markers.py +611 -17
  45. pystylometry/stylistic/vocabulary_overlap.py +354 -13
  46. pystylometry/syntactic/README.md +20 -0
  47. pystylometry/syntactic/advanced_syntactic.py +76 -14
  48. pystylometry/syntactic/pos_ratios.py +70 -6
  49. pystylometry/syntactic/sentence_stats.py +55 -12
  50. pystylometry/syntactic/sentence_types.py +71 -15
  51. pystylometry/viz/README.md +27 -0
  52. pystylometry/viz/__init__.py +71 -0
  53. pystylometry/viz/drift.py +589 -0
  54. pystylometry/viz/jsx/__init__.py +31 -0
  55. pystylometry/viz/jsx/_base.py +144 -0
  56. pystylometry/viz/jsx/report.py +677 -0
  57. pystylometry/viz/jsx/timeline.py +716 -0
  58. pystylometry/viz/jsx/viewer.py +1032 -0
  59. pystylometry-1.3.0.dist-info/METADATA +136 -0
  60. pystylometry-1.3.0.dist-info/RECORD +76 -0
  61. {pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
  62. pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
  63. pystylometry-1.0.0.dist-info/METADATA +0 -275
  64. pystylometry-1.0.0.dist-info/RECORD +0 -46
pystylometry/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # pystylometry
2
+
3
+ ![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)
4
+ ![License: MIT](https://img.shields.io/badge/license-MIT-green)
5
+
6
+ Core package for stylometric analysis and authorship attribution.
7
+
8
+ ## Module Map
9
+
10
+ | Module | Purpose | Key Functions |
11
+ |--------|---------|---------------|
12
+ | [`lexical/`](lexical/) | Vocabulary diversity & richness | `compute_mtld`, `compute_yule`, `compute_ttr`, `compute_hapax_ratios` |
13
+ | [`readability/`](readability/) | Text readability scoring | `compute_flesch`, `compute_gunning_fog`, `compute_ari`, `compute_smog` |
14
+ | [`syntactic/`](syntactic/) | Sentence & parse structure | `compute_pos_ratios`, `compute_sentence_types`, `compute_advanced_syntactic` |
15
+ | [`authorship/`](authorship/) | Author attribution & comparison | `compute_burrows_delta`, `compute_kilgarriff`, `compute_compression_distance` |
16
+ | [`stylistic/`](stylistic/) | Style markers & vocabulary overlap | `compute_stylistic_markers`, `compute_vocabulary_overlap`, `compute_genre_register` |
17
+ | [`character/`](character/) | Character-level features | `compute_character_metrics` |
18
+ | [`ngrams/`](ngrams/) | N-gram entropy & sequences | `compute_extended_ngrams`, `compute_ngram_entropy` |
19
+ | [`dialect/`](dialect/) | Regional dialect detection | `compute_dialect` |
20
+ | [`consistency/`](consistency/) | Intra-document drift detection | `compute_kilgarriff_drift` |
21
+ | [`prosody/`](prosody/) | Rhythm & stress patterns | `compute_rhythm_prosody` |
22
+ | [`viz/`](viz/) | Visualization (PNG & interactive HTML) | `plot_drift_timeline`, `export_drift_report_jsx` |
23
+
24
+ ## Shared Internals
25
+
26
+ | File | Purpose |
27
+ |------|---------|
28
+ | `_types.py` | All dataclass result types (e.g. `FleschResult`, `MTLDResult`, `KilgarriffDriftResult`) |
29
+ | `_normalize.py` | Text normalization for readability and stylometry pipelines |
30
+ | `_utils.py` | Shared tokenization and helper functions |
31
+ | `tokenizer.py` | Configurable tokenizer with sentence/word splitting |
32
+ | `cli.py` | Command-line interface (`pystylometry analyze`) |
33
+
34
+ ## Installation Extras
35
+
36
+ ```
37
+ pip install pystylometry # Core (lexical only)
38
+ pip install pystylometry[readability] # + readability
39
+ pip install pystylometry[syntactic] # + syntactic (requires spaCy)
40
+ pip install pystylometry[authorship] # + authorship attribution
41
+ pip install pystylometry[all] # Everything
42
+ ```
pystylometry/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  pystylometry - Comprehensive Python package for stylometric analysis.
3
3
 
4
4
  A modular package for text analysis with lexical, readability, syntactic,
5
- authorship, and n-gram metrics.
5
+ authorship, n-gram, dialect detection, and consistency analysis metrics.
6
6
 
7
7
  Installation:
8
8
  pip install pystylometry # Core (lexical only)
@@ -16,7 +16,9 @@ Usage:
16
16
  from pystylometry.lexical import compute_mtld, compute_yule
17
17
  from pystylometry.readability import compute_flesch
18
18
  from pystylometry.syntactic import compute_pos_ratios
19
- from pystylometry.authorship import compute_burrows_delta
19
+ from pystylometry.authorship import compute_burrows_delta, compute_kilgarriff
20
+ from pystylometry.consistency import compute_kilgarriff_drift
21
+ from pystylometry.dialect import compute_dialect
20
22
 
21
23
  # Or use the unified analyze() function
22
24
  from pystylometry import analyze
@@ -24,6 +26,18 @@ Usage:
24
26
  results = analyze(text, lexical=True, readability=True)
25
27
  print(results.lexical['mtld'].mtld_average)
26
28
  print(results.readability['flesch'].reading_ease)
29
+
30
+ # Dialect detection
31
+ result = compute_dialect("The colour of the programme was brilliant.")
32
+ print(result.dialect) # 'british'
33
+ print(result.british_score) # 0.85
34
+
35
+ # Consistency analysis (Style Drift Detector - Issue #36)
36
+ from pystylometry.consistency import compute_kilgarriff_drift
37
+
38
+ result = compute_kilgarriff_drift(long_document)
39
+ print(result.pattern) # 'consistent', 'sudden_spike', 'suspiciously_uniform', etc.
40
+ print(result.pattern_confidence)
27
41
  """
28
42
 
29
43
  from ._types import AnalysisResult
@@ -49,14 +63,28 @@ try:
49
63
  except ImportError:
50
64
  _SYNTACTIC_AVAILABLE = False
51
65
 
52
- # Authorship and ngrams use only stdlib (no external dependencies)
66
+ # Prosody requires pronouncing (CMU dictionary) - same dependency as readability
67
+ try:
68
+ from . import prosody # noqa: F401 - Rhythm and prosody metrics (Issue #25)
69
+
70
+ _PROSODY_AVAILABLE = True
71
+ except ImportError:
72
+ _PROSODY_AVAILABLE = False
73
+
74
+ # Authorship, ngrams, dialect, consistency, and stylistic use only stdlib (no external dependencies)
53
75
  from . import (
54
76
  authorship, # noqa: F401
77
+ consistency, # noqa: F401 - Style drift detection (Issue #36)
78
+ dialect, # noqa: F401
55
79
  ngrams, # noqa: F401
80
+ stylistic, # noqa: F401 - Vocabulary overlap and similarity (Issue #21)
56
81
  )
57
82
 
58
83
  _AUTHORSHIP_AVAILABLE = True
59
84
  _NGRAMS_AVAILABLE = True
85
+ _DIALECT_AVAILABLE = True
86
+ _CONSISTENCY_AVAILABLE = True
87
+ _STYLISTIC_AVAILABLE = True
60
88
 
61
89
 
62
90
  def analyze(
@@ -177,6 +205,8 @@ def get_available_modules() -> dict[str, bool]:
177
205
  >>> available = get_available_modules()
178
206
  >>> if available['readability']:
179
207
  ... from pystylometry.readability import compute_flesch
208
+ >>> if available['consistency']:
209
+ ... from pystylometry.consistency import compute_kilgarriff_drift
180
210
  """
181
211
  return {
182
212
  "lexical": True, # Always available
@@ -184,6 +214,10 @@ def get_available_modules() -> dict[str, bool]:
184
214
  "syntactic": _SYNTACTIC_AVAILABLE,
185
215
  "authorship": _AUTHORSHIP_AVAILABLE,
186
216
  "ngrams": _NGRAMS_AVAILABLE,
217
+ "dialect": _DIALECT_AVAILABLE,
218
+ "consistency": _CONSISTENCY_AVAILABLE, # Style drift detection (Issue #36)
219
+ "stylistic": _STYLISTIC_AVAILABLE, # Vocabulary overlap (Issue #21)
220
+ "prosody": _PROSODY_AVAILABLE, # Rhythm and prosody (Issue #25)
187
221
  }
188
222
 
189
223
 
@@ -203,3 +237,11 @@ if _AUTHORSHIP_AVAILABLE:
203
237
  __all__.append("authorship")
204
238
  if _NGRAMS_AVAILABLE:
205
239
  __all__.append("ngrams")
240
+ if _DIALECT_AVAILABLE:
241
+ __all__.append("dialect")
242
+ if _CONSISTENCY_AVAILABLE:
243
+ __all__.append("consistency")
244
+ if _STYLISTIC_AVAILABLE:
245
+ __all__.append("stylistic")
246
+ if _PROSODY_AVAILABLE:
247
+ __all__.append("prosody")