pystylometry 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. pystylometry/__init__.py +29 -3
  2. pystylometry/_types.py +963 -259
  3. pystylometry/authorship/__init__.py +23 -2
  4. pystylometry/authorship/additional_methods.py +4 -29
  5. pystylometry/authorship/kilgarriff.py +347 -0
  6. pystylometry/character/character_metrics.py +267 -179
  7. pystylometry/cli.py +427 -0
  8. pystylometry/consistency/__init__.py +57 -0
  9. pystylometry/consistency/_thresholds.py +162 -0
  10. pystylometry/consistency/drift.py +549 -0
  11. pystylometry/dialect/__init__.py +65 -0
  12. pystylometry/dialect/_data/dialect_markers.json +1134 -0
  13. pystylometry/dialect/_loader.py +360 -0
  14. pystylometry/dialect/detector.py +533 -0
  15. pystylometry/lexical/advanced_diversity.py +61 -22
  16. pystylometry/lexical/function_words.py +255 -56
  17. pystylometry/lexical/hapax.py +182 -52
  18. pystylometry/lexical/mtld.py +108 -26
  19. pystylometry/lexical/ttr.py +76 -10
  20. pystylometry/lexical/word_frequency_sophistication.py +1522 -298
  21. pystylometry/lexical/yule.py +136 -50
  22. pystylometry/ngrams/entropy.py +150 -49
  23. pystylometry/readability/additional_formulas.py +1887 -762
  24. pystylometry/readability/ari.py +144 -82
  25. pystylometry/readability/coleman_liau.py +136 -109
  26. pystylometry/readability/flesch.py +177 -73
  27. pystylometry/readability/gunning_fog.py +165 -161
  28. pystylometry/readability/smog.py +123 -42
  29. pystylometry/syntactic/advanced_syntactic.py +76 -14
  30. pystylometry/syntactic/pos_ratios.py +70 -6
  31. pystylometry/syntactic/sentence_stats.py +55 -12
  32. pystylometry/syntactic/sentence_types.py +71 -15
  33. pystylometry/viz/__init__.py +71 -0
  34. pystylometry/viz/drift.py +589 -0
  35. pystylometry/viz/jsx/__init__.py +31 -0
  36. pystylometry/viz/jsx/_base.py +144 -0
  37. pystylometry/viz/jsx/report.py +677 -0
  38. pystylometry/viz/jsx/timeline.py +716 -0
  39. pystylometry/viz/jsx/viewer.py +1032 -0
  40. {pystylometry-1.0.0.dist-info → pystylometry-1.1.0.dist-info}/METADATA +5 -2
  41. pystylometry-1.1.0.dist-info/RECORD +63 -0
  42. {pystylometry-1.0.0.dist-info → pystylometry-1.1.0.dist-info}/WHEEL +1 -1
  43. pystylometry-1.1.0.dist-info/entry_points.txt +4 -0
  44. pystylometry-1.0.0.dist-info/RECORD +0 -46
pystylometry/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  pystylometry - Comprehensive Python package for stylometric analysis.
3
3
 
4
4
  A modular package for text analysis with lexical, readability, syntactic,
5
- authorship, and n-gram metrics.
5
+ authorship, n-gram, dialect detection, and consistency analysis metrics.
6
6
 
7
7
  Installation:
8
8
  pip install pystylometry # Core (lexical only)
@@ -16,7 +16,9 @@ Usage:
16
16
  from pystylometry.lexical import compute_mtld, compute_yule
17
17
  from pystylometry.readability import compute_flesch
18
18
  from pystylometry.syntactic import compute_pos_ratios
19
- from pystylometry.authorship import compute_burrows_delta
19
+ from pystylometry.authorship import compute_burrows_delta, compute_kilgarriff
20
+ from pystylometry.consistency import compute_kilgarriff_drift
21
+ from pystylometry.dialect import compute_dialect
20
22
 
21
23
  # Or use the unified analyze() function
22
24
  from pystylometry import analyze
@@ -24,6 +26,18 @@ Usage:
24
26
  results = analyze(text, lexical=True, readability=True)
25
27
  print(results.lexical['mtld'].mtld_average)
26
28
  print(results.readability['flesch'].reading_ease)
29
+
30
+ # Dialect detection
31
+ result = compute_dialect("The colour of the programme was brilliant.")
32
+ print(result.dialect) # 'british'
33
+ print(result.british_score) # 0.85
34
+
35
+ # Consistency analysis (Style Drift Detector - Issue #36)
36
+ from pystylometry.consistency import compute_kilgarriff_drift
37
+
38
+ result = compute_kilgarriff_drift(long_document)
39
+ print(result.pattern) # 'consistent', 'sudden_spike', 'suspiciously_uniform', etc.
40
+ print(result.pattern_confidence)
27
41
  """
28
42
 
29
43
  from ._types import AnalysisResult
@@ -49,14 +63,18 @@ try:
49
63
  except ImportError:
50
64
  _SYNTACTIC_AVAILABLE = False
51
65
 
52
- # Authorship and ngrams use only stdlib (no external dependencies)
66
+ # Authorship, ngrams, dialect, and consistency use only stdlib (no external dependencies)
53
67
  from . import (
54
68
  authorship, # noqa: F401
69
+ consistency, # noqa: F401 - Style drift detection (Issue #36)
70
+ dialect, # noqa: F401
55
71
  ngrams, # noqa: F401
56
72
  )
57
73
 
58
74
  _AUTHORSHIP_AVAILABLE = True
59
75
  _NGRAMS_AVAILABLE = True
76
+ _DIALECT_AVAILABLE = True
77
+ _CONSISTENCY_AVAILABLE = True
60
78
 
61
79
 
62
80
  def analyze(
@@ -177,6 +195,8 @@ def get_available_modules() -> dict[str, bool]:
177
195
  >>> available = get_available_modules()
178
196
  >>> if available['readability']:
179
197
  ... from pystylometry.readability import compute_flesch
198
+ >>> if available['consistency']:
199
+ ... from pystylometry.consistency import compute_kilgarriff_drift
180
200
  """
181
201
  return {
182
202
  "lexical": True, # Always available
@@ -184,6 +204,8 @@ def get_available_modules() -> dict[str, bool]:
184
204
  "syntactic": _SYNTACTIC_AVAILABLE,
185
205
  "authorship": _AUTHORSHIP_AVAILABLE,
186
206
  "ngrams": _NGRAMS_AVAILABLE,
207
+ "dialect": _DIALECT_AVAILABLE,
208
+ "consistency": _CONSISTENCY_AVAILABLE, # Style drift detection (Issue #36)
187
209
  }
188
210
 
189
211
 
@@ -203,3 +225,7 @@ if _AUTHORSHIP_AVAILABLE:
203
225
  __all__.append("authorship")
204
226
  if _NGRAMS_AVAILABLE:
205
227
  __all__.append("ngrams")
228
+ if _DIALECT_AVAILABLE:
229
+ __all__.append("dialect")
230
+ if _CONSISTENCY_AVAILABLE:
231
+ __all__.append("consistency")