pystylometry 1.3.1__py3-none-any.whl → 1.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pystylometry/__init__.py CHANGED
@@ -40,14 +40,13 @@ Usage:
40
40
  print(result.pattern_confidence)
41
41
  """
42
42
 
43
+ from . import lexical # noqa: E402
43
44
  from ._types import AnalysisResult
45
+ from .tokenizer import TokenizationStats, Tokenizer, TokenMetadata
44
46
 
45
47
  # Version
46
48
  __version__ = "0.1.0"
47
49
 
48
- # Core exports - always available
49
- from . import lexical
50
-
51
50
  # Optional exports - may raise ImportError if dependencies not installed
52
51
  try:
53
52
  from . import readability # noqa: F401
@@ -87,6 +86,41 @@ _CONSISTENCY_AVAILABLE = True
87
86
  _STYLISTIC_AVAILABLE = True
88
87
 
89
88
 
89
+ def tokenize(text: str, **kwargs: object) -> list[str]:
90
+ """Tokenize text using the stylometric tokenizer.
91
+
92
+ Convenience wrapper around Tokenizer.tokenize(). All keyword arguments
93
+ are forwarded to the Tokenizer constructor.
94
+
95
+ Args:
96
+ text: Input text to tokenize.
97
+ **kwargs: Options forwarded to Tokenizer (lowercase, strip_numbers,
98
+ expand_contractions, etc.).
99
+
100
+ Returns:
101
+ List of token strings.
102
+
103
+ Example:
104
+ >>> from pystylometry import tokenize
105
+ >>> tokenize("Hello, world! It's a test.")
106
+ ['hello', 'world', "it's", 'a', 'test']
107
+ """
108
+ return Tokenizer(**kwargs).tokenize(text) # type: ignore[arg-type]
109
+
110
+
111
+ def tokenize_with_metadata(text: str, **kwargs: object) -> list[TokenMetadata]:
112
+ """Tokenize text and return tokens with positional and type metadata.
113
+
114
+ Args:
115
+ text: Input text to tokenize.
116
+ **kwargs: Options forwarded to Tokenizer.
117
+
118
+ Returns:
119
+ List of TokenMetadata objects.
120
+ """
121
+ return Tokenizer(**kwargs).tokenize_with_metadata(text) # type: ignore[arg-type]
122
+
123
+
90
124
  def analyze(
91
125
  text: str,
92
126
  lexical_metrics: bool = True,
@@ -225,6 +259,11 @@ __all__ = [
225
259
  "__version__",
226
260
  "analyze",
227
261
  "get_available_modules",
262
+ "tokenize",
263
+ "tokenize_with_metadata",
264
+ "Tokenizer",
265
+ "TokenMetadata",
266
+ "TokenizationStats",
228
267
  "lexical",
229
268
  ]
230
269
 
pystylometry/_types.py CHANGED
@@ -23,7 +23,7 @@ from __future__ import annotations
23
23
 
24
24
  import statistics
25
25
  from dataclasses import dataclass
26
- from typing import Any
26
+ from typing import Any, Optional
27
27
 
28
28
  # ===== Distribution and Chunking =====
29
29
  # Related to GitHub Issue #27: Native chunked analysis with Distribution dataclass
@@ -316,8 +316,8 @@ class HapaxLexiconResult:
316
316
  class TTRResult:
317
317
  """Result from Type-Token Ratio (TTR) analysis.
318
318
 
319
- Wraps stylometry-ttr package functionality to measure vocabulary richness
320
- through the ratio of unique words (types) to total words (tokens).
319
+ Measures vocabulary richness through the ratio of unique words (types)
320
+ to total words (tokens).
321
321
 
322
322
  All numeric metrics include both a mean value (convenient access) and
323
323
  a full distribution with per-chunk values and statistics.
@@ -370,6 +370,56 @@ class TTRResult:
370
370
  metadata: dict[str, Any]
371
371
 
372
372
 
373
+ @dataclass
374
+ class TTRAggregateResult:
375
+ """Aggregated TTR statistics for a collection of texts.
376
+
377
+ Computes group-level summary statistics (mean, std, min, max, median)
378
+ across multiple ``TTRResult`` objects. Useful for comparative analysis
379
+ across authors, genres, or time periods.
380
+
381
+ Related GitHub Issue:
382
+ #43 - Inline stylometry-ttr into pystylometry (remove external dependency)
383
+ https://github.com/craigtrim/pystylometry/issues/43
384
+
385
+ Example:
386
+ >>> from pystylometry.lexical import compute_ttr, TTRAggregator
387
+ >>> results = [compute_ttr(t) for t in texts]
388
+ >>> agg = TTRAggregator()
389
+ >>> stats = agg.aggregate(results, group_id="Austen")
390
+ >>> stats.ttr_mean
391
+ 0.412
392
+ """
393
+
394
+ group_id: str
395
+ text_count: int
396
+ total_words: int
397
+
398
+ # Raw TTR statistics
399
+ ttr_mean: float
400
+ ttr_std: float
401
+ ttr_min: float
402
+ ttr_max: float
403
+ ttr_median: float
404
+
405
+ # Root TTR (Guiraud's index) statistics
406
+ root_ttr_mean: float
407
+ root_ttr_std: float
408
+
409
+ # Log TTR (Herdan's C) statistics
410
+ log_ttr_mean: float
411
+ log_ttr_std: float
412
+
413
+ # STTR statistics (None if no texts had enough words for STTR)
414
+ sttr_mean: Optional[float]
415
+ sttr_std: Optional[float]
416
+
417
+ # Delta std mean (None if no texts had delta metrics)
418
+ delta_std_mean: Optional[float]
419
+
420
+ metadata: dict[str, Any]
421
+
422
+
373
423
  # ===== Repetition Detection Results =====
374
424
  # Related to GitHub Issue #28: Verbal tics detection for slop analysis
375
425
  # https://github.com/craigtrim/pystylometry/issues/28
pystylometry/cli.py CHANGED
@@ -3,6 +3,7 @@
3
3
  Usage:
4
4
  pystylometry-drift <file> [--window-size=N] [--stride=N] [--mode=MODE] [--json]
5
5
  pystylometry-drift <file> --plot [output.png]
6
+ pystylometry-tokenize <file> [--json] [--metadata] [--stats]
6
7
 
7
8
  Example:
8
9
  pystylometry-drift manuscript.txt
@@ -10,6 +11,9 @@ Example:
10
11
  pystylometry-drift manuscript.txt --json
11
12
  pystylometry-drift manuscript.txt --plot
12
13
  pystylometry-drift manuscript.txt --plot drift_report.png
14
+ pystylometry-tokenize manuscript.txt
15
+ pystylometry-tokenize manuscript.txt --json --metadata
16
+ pystylometry-tokenize manuscript.txt --stats
13
17
  """
14
18
 
15
19
  from __future__ import annotations
@@ -423,5 +427,322 @@ The generated viewer includes:
423
427
  print()
424
428
 
425
429
 
430
+ def tokenize_cli() -> None:
431
+ """CLI entry point for stylometric tokenization."""
432
+ parser = argparse.ArgumentParser(
433
+ prog="pystylometry-tokenize",
434
+ description="Tokenize text for stylometric analysis.",
435
+ formatter_class=argparse.RawDescriptionHelpFormatter,
436
+ epilog="""
437
+ Examples:
438
+ pystylometry-tokenize manuscript.txt
439
+ pystylometry-tokenize manuscript.txt --json
440
+ pystylometry-tokenize manuscript.txt --json --metadata
441
+ pystylometry-tokenize manuscript.txt --stats
442
+ pystylometry-tokenize manuscript.txt -U --expand-contractions
443
+ pystylometry-tokenize manuscript.txt --min-length 3 --strip-numbers
444
+ """,
445
+ )
446
+
447
+ parser.add_argument(
448
+ "file",
449
+ type=Path,
450
+ help="Path to text file to tokenize",
451
+ )
452
+
453
+ # Output mode
454
+ output_group = parser.add_argument_group("output")
455
+ output_group.add_argument(
456
+ "-j",
457
+ "--json",
458
+ action="store_true",
459
+ help="Output as JSON (list of strings, or list of objects with --metadata)",
460
+ )
461
+ output_group.add_argument(
462
+ "-m",
463
+ "--metadata",
464
+ action="store_true",
465
+ help="Include token type and position metadata (implies --json)",
466
+ )
467
+ output_group.add_argument(
468
+ "-s",
469
+ "--stats",
470
+ action="store_true",
471
+ help="Show tokenization statistics instead of tokens",
472
+ )
473
+
474
+ # Core behavior
475
+ behavior_group = parser.add_argument_group("behavior")
476
+ behavior_group.add_argument(
477
+ "-U",
478
+ "--no-lowercase",
479
+ action="store_true",
480
+ help="Preserve original case (default: lowercase)",
481
+ )
482
+ behavior_group.add_argument(
483
+ "-e",
484
+ "--expand-contractions",
485
+ action="store_true",
486
+ help="Expand contractions (it's -> it is)",
487
+ )
488
+ behavior_group.add_argument(
489
+ "-n",
490
+ "--strip-numbers",
491
+ action="store_true",
492
+ help="Remove numeric tokens",
493
+ )
494
+ behavior_group.add_argument(
495
+ "--keep-punctuation",
496
+ action="store_true",
497
+ help="Keep punctuation tokens (default: stripped)",
498
+ )
499
+
500
+ # Filtering
501
+ filter_group = parser.add_argument_group("filtering")
502
+ filter_group.add_argument(
503
+ "--min-length",
504
+ type=int,
505
+ default=1,
506
+ metavar="N",
507
+ help="Minimum token length (default: 1)",
508
+ )
509
+ filter_group.add_argument(
510
+ "--max-length",
511
+ type=int,
512
+ default=None,
513
+ metavar="N",
514
+ help="Maximum token length (default: unlimited)",
515
+ )
516
+ filter_group.add_argument(
517
+ "--preserve-urls",
518
+ action="store_true",
519
+ help="Keep URL tokens",
520
+ )
521
+ filter_group.add_argument(
522
+ "--preserve-emails",
523
+ action="store_true",
524
+ help="Keep email tokens",
525
+ )
526
+ filter_group.add_argument(
527
+ "--preserve-hashtags",
528
+ action="store_true",
529
+ help="Keep hashtag tokens",
530
+ )
531
+ filter_group.add_argument(
532
+ "--preserve-mentions",
533
+ action="store_true",
534
+ help="Keep @mention tokens",
535
+ )
536
+
537
+ # Advanced
538
+ advanced_group = parser.add_argument_group("advanced")
539
+ advanced_group.add_argument(
540
+ "--expand-abbreviations",
541
+ action="store_true",
542
+ help="Expand abbreviations (Dr. -> Doctor)",
543
+ )
544
+ advanced_group.add_argument(
545
+ "--strip-accents",
546
+ action="store_true",
547
+ help="Remove accents from characters",
548
+ )
549
+ advanced_group.add_argument(
550
+ "--no-clean",
551
+ action="store_true",
552
+ help="Skip text cleaning (italics, brackets, page markers)",
553
+ )
554
+ advanced_group.add_argument(
555
+ "--no-unicode-normalize",
556
+ action="store_true",
557
+ help="Skip unicode normalization",
558
+ )
559
+
560
+ args = parser.parse_args()
561
+
562
+ # --- ANSI colors ---
563
+ use_color = sys.stderr.isatty()
564
+
565
+ def _c(code: str, text: str) -> str:
566
+ return f"\033[{code}m{text}\033[0m" if use_color else text
567
+
568
+ bold = lambda t: _c("1", t) # noqa: E731
569
+ dim = lambda t: _c("2", t) # noqa: E731
570
+ cyan = lambda t: _c("36", t) # noqa: E731
571
+ green = lambda t: _c("32", t) # noqa: E731
572
+ yellow = lambda t: _c("33", t) # noqa: E731
573
+
574
+ # --- Validate file ---
575
+ if not args.file.exists():
576
+ print(f"Error: File not found: {args.file}", file=sys.stderr)
577
+ sys.exit(1)
578
+
579
+ try:
580
+ text = args.file.read_text(encoding="utf-8")
581
+ except Exception as e:
582
+ print(f"Error reading file: {e}", file=sys.stderr)
583
+ sys.exit(1)
584
+
585
+ # --- Build Tokenizer kwargs ---
586
+ tokenizer_kwargs = {
587
+ "lowercase": not args.no_lowercase,
588
+ "min_length": args.min_length,
589
+ "max_length": args.max_length,
590
+ "strip_numbers": args.strip_numbers,
591
+ "strip_punctuation": not args.keep_punctuation,
592
+ "preserve_urls": args.preserve_urls,
593
+ "preserve_emails": args.preserve_emails,
594
+ "preserve_hashtags": args.preserve_hashtags,
595
+ "preserve_mentions": args.preserve_mentions,
596
+ "expand_contractions": args.expand_contractions,
597
+ "expand_abbreviations": args.expand_abbreviations,
598
+ "strip_accents": args.strip_accents,
599
+ "normalize_unicode": not args.no_unicode_normalize,
600
+ "clean_text": not args.no_clean,
601
+ }
602
+
603
+ # Collect active options for banner
604
+ active_opts = []
605
+ if args.no_lowercase:
606
+ active_opts.append("preserve case")
607
+ if args.expand_contractions:
608
+ active_opts.append("expand contractions")
609
+ if args.expand_abbreviations:
610
+ active_opts.append("expand abbreviations")
611
+ if args.strip_numbers:
612
+ active_opts.append("strip numbers")
613
+ if args.keep_punctuation:
614
+ active_opts.append("keep punctuation")
615
+ if args.strip_accents:
616
+ active_opts.append("strip accents")
617
+ if args.no_clean:
618
+ active_opts.append("skip cleaning")
619
+ if args.no_unicode_normalize:
620
+ active_opts.append("skip unicode normalization")
621
+ if args.preserve_urls:
622
+ active_opts.append("preserve URLs")
623
+ if args.preserve_emails:
624
+ active_opts.append("preserve emails")
625
+ if args.preserve_hashtags:
626
+ active_opts.append("preserve hashtags")
627
+ if args.preserve_mentions:
628
+ active_opts.append("preserve mentions")
629
+ if args.min_length > 1:
630
+ active_opts.append(f"min length {args.min_length}")
631
+ if args.max_length is not None:
632
+ active_opts.append(f"max length {args.max_length}")
633
+
634
+ # Determine output format
635
+ if args.stats:
636
+ output_format = "Statistics"
637
+ elif args.metadata:
638
+ output_format = "JSON (with metadata)"
639
+ elif args.json:
640
+ output_format = "JSON"
641
+ else:
642
+ output_format = "One token per line"
643
+
644
+ # --- Banner (to stderr so stdout stays pipeable) ---
645
+ char_count = len(text)
646
+ line_count = text.count("\n") + 1
647
+
648
+ banner = sys.stderr
649
+ print(file=banner)
650
+ print(f" {bold('PYSTYLOMETRY')} {dim('—')} {cyan('Stylometric Tokenizer')}", file=banner)
651
+ print(f" {dim('═' * 71)}", file=banner)
652
+ print(file=banner)
653
+ print(f" {bold('INPUT')}", file=banner)
654
+ print(f" {dim('─' * 71)}", file=banner)
655
+ print(f" File: {args.file}", file=banner)
656
+ print(f" Size: {char_count:,} characters / {line_count:,} lines", file=banner)
657
+ print(file=banner)
658
+ print(f" {bold('CONFIGURATION')}", file=banner)
659
+ print(f" {dim('─' * 71)}", file=banner)
660
+ print(f" Case: {'preserve' if args.no_lowercase else 'lowercase'}", file=banner)
661
+ print(
662
+ f" Punctuation: {'keep' if args.keep_punctuation else 'strip'}",
663
+ file=banner,
664
+ )
665
+ print(
666
+ f" Contractions: {'expand' if args.expand_contractions else 'preserve'}",
667
+ file=banner,
668
+ )
669
+ print(f" Numbers: {'strip' if args.strip_numbers else 'keep'}", file=banner)
670
+ if active_opts:
671
+ print(f" Active options: {', '.join(active_opts)}", file=banner)
672
+ print(file=banner)
673
+ print(f" {bold('OUTPUT')}", file=banner)
674
+ print(f" {dim('─' * 71)}", file=banner)
675
+ print(f" Format: {output_format}", file=banner)
676
+ print(file=banner)
677
+
678
+ # --- Tokenize ---
679
+ from pystylometry.tokenizer import Tokenizer
680
+
681
+ tokenizer = Tokenizer(**tokenizer_kwargs)
682
+
683
+ if args.stats:
684
+ stats = tokenizer.get_statistics(text)
685
+ print(f" {bold('RESULTS')}", file=banner)
686
+ print(f" {dim('─' * 71)}", file=banner)
687
+ print(f" Total tokens: {green(f'{stats.total_tokens:,}')}", file=banner)
688
+ print(f" Unique tokens: {green(f'{stats.unique_tokens:,}')}", file=banner)
689
+ print(f" Word tokens: {stats.word_tokens:,}", file=banner)
690
+ print(f" Number tokens: {stats.number_tokens:,}", file=banner)
691
+ print(f" Punctuation: {stats.punctuation_tokens:,}", file=banner)
692
+ print(f" URLs: {stats.url_tokens:,}", file=banner)
693
+ print(f" Emails: {stats.email_tokens:,}", file=banner)
694
+ print(f" Hashtags: {stats.hashtag_tokens:,}", file=banner)
695
+ print(f" Mentions: {stats.mention_tokens:,}", file=banner)
696
+ print(f" Avg length: {stats.average_token_length:.1f}", file=banner)
697
+ print(f" Min length: {stats.min_token_length}", file=banner)
698
+ print(f" Max length: {stats.max_token_length}", file=banner)
699
+ print(file=banner)
700
+
701
+ if args.json:
702
+ import dataclasses
703
+
704
+ print(json.dumps(dataclasses.asdict(stats), indent=2))
705
+
706
+ elif args.metadata or (args.json and args.metadata):
707
+ metadata_list = tokenizer.tokenize_with_metadata(text)
708
+ count = len(metadata_list)
709
+ print(
710
+ f" {yellow('Tokenizing...')} {green(f'{count:,}')} tokens extracted",
711
+ file=banner,
712
+ )
713
+ print(file=banner)
714
+ output = [
715
+ {
716
+ "token": m.token,
717
+ "start": m.start,
718
+ "end": m.end,
719
+ "type": m.token_type,
720
+ }
721
+ for m in metadata_list
722
+ ]
723
+ print(json.dumps(output, indent=2))
724
+
725
+ elif args.json:
726
+ tokens = tokenizer.tokenize(text)
727
+ count = len(tokens)
728
+ print(
729
+ f" {yellow('Tokenizing...')} {green(f'{count:,}')} tokens extracted",
730
+ file=banner,
731
+ )
732
+ print(file=banner)
733
+ print(json.dumps(tokens, indent=2))
734
+
735
+ else:
736
+ tokens = tokenizer.tokenize(text)
737
+ count = len(tokens)
738
+ print(
739
+ f" {yellow('Tokenizing...')} {green(f'{count:,}')} tokens extracted",
740
+ file=banner,
741
+ )
742
+ print(file=banner)
743
+ for token in tokens:
744
+ print(token)
745
+
746
+
426
747
  if __name__ == "__main__":
427
748
  drift_cli()
@@ -6,12 +6,13 @@ from .function_words import compute_function_words
6
6
  from .hapax import compute_hapax_ratios, compute_hapax_with_lexicon_analysis
7
7
  from .mtld import compute_mtld
8
8
  from .repetition import compute_repetitive_ngrams, compute_repetitive_unigrams
9
- from .ttr import compute_ttr
9
+ from .ttr import TTRAggregator, compute_ttr
10
10
  from .word_frequency_sophistication import compute_word_frequency_sophistication
11
11
  from .yule import compute_yule
12
12
 
13
13
  __all__ = [
14
14
  "compute_ttr",
15
+ "TTRAggregator",
15
16
  "compute_mtld",
16
17
  "compute_yule",
17
18
  "compute_hapax_ratios",
@@ -1,149 +1,340 @@
1
- """Type-Token Ratio (TTR) analysis using stylometry-ttr package.
1
+ """Type-Token Ratio (TTR) analysis with native chunked computation.
2
2
 
3
- This module provides a facade wrapper around the stylometry-ttr package,
4
- maintaining consistent API patterns with other pystylometry metrics.
3
+ Computes multiple TTR variants for measuring lexical diversity (vocabulary
4
+ richness). All metrics are computed per-chunk and wrapped in Distribution
5
+ objects for stylometric fingerprinting.
5
6
 
6
- Related GitHub Issue:
7
+ Previously delegated to the external ``stylometry-ttr`` package; now
8
+ computed inline using only the Python standard library (``math`` and
9
+ ``statistics``).
10
+
11
+ Related GitHub Issues:
7
12
  #27 - Native chunked analysis with Distribution dataclass
8
13
  https://github.com/craigtrim/pystylometry/issues/27
14
+
15
+ #43 - Inline stylometry-ttr into pystylometry (remove external dependency)
16
+ https://github.com/craigtrim/pystylometry/issues/43
17
+
18
+ References:
19
+ Guiraud, P. (1960). Problèmes et méthodes de la statistique linguistique.
20
+ Herdan, G. (1960). Type-token Mathematics: A Textbook of Mathematical
21
+ Linguistics. Mouton.
22
+ Johnson, W. (1944). Studies in language behavior: I. A program of research.
23
+ Psychological Monographs, 56(2), 1-15.
9
24
  """
10
25
 
11
26
  from __future__ import annotations
12
27
 
13
- from .._types import Distribution, TTRResult, make_distribution
28
+ import math
29
+ import statistics
30
+ from typing import Optional
31
+
32
+ from .._types import Distribution, TTRAggregateResult, TTRResult, make_distribution
33
+ from ..tokenizer import Tokenizer
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Internal helpers
37
+ # ---------------------------------------------------------------------------
38
+
39
+ # Minimum words required before STTR computation is meaningful.
40
+ # With fewer words we cannot form at least two full chunks, so the
41
+ # standardised metric would be unreliable.
42
+ _MIN_WORDS_FOR_STTR = 2000
43
+
44
+
45
+ def _compute_chunk_ttrs(tokens: list[str], chunk_size: int) -> list[float]:
46
+ """Compute per-chunk raw TTR values for non-overlapping chunks.
47
+
48
+ Only full-sized chunks are included so that every TTR is measured on the
49
+ same token count, keeping the standardised metric unbiased.
50
+
51
+ Args:
52
+ tokens: Full token list.
53
+ chunk_size: Number of tokens per chunk.
54
+
55
+ Returns:
56
+ List of per-chunk TTR values (may be empty if too few tokens).
57
+ """
58
+ total = len(tokens)
59
+ chunk_ttrs: list[float] = []
60
+ for i in range(0, total - chunk_size + 1, chunk_size):
61
+ chunk = tokens[i : i + chunk_size]
62
+ chunk_ttrs.append(len(set(chunk)) / chunk_size)
63
+ return chunk_ttrs
64
+
14
65
 
66
+ def _compute_deltas(
67
+ chunk_ttrs: list[float],
68
+ ) -> tuple[Optional[float], Optional[float], Optional[float], Optional[float]]:
69
+ """Compute delta metrics: TTR(n) - TTR(n-1) for consecutive chunks.
15
70
 
16
- def compute_ttr(text: str, text_id: str | None = None, chunk_size: int = 1000) -> TTRResult:
71
+ Delta metrics capture chunk-to-chunk vocabulary variability:
72
+ - delta_mean: average change (positive = expanding vocabulary)
73
+ - delta_std: volatility of change (stylometric fingerprint)
74
+ - delta_min: largest negative swing
75
+ - delta_max: largest positive swing
76
+
77
+ Args:
78
+ chunk_ttrs: Per-chunk TTR values (needs >= 2 values).
79
+
80
+ Returns:
81
+ Tuple of (delta_mean, delta_std, delta_min, delta_max).
82
+ All ``None`` when fewer than 2 chunks are available.
17
83
  """
18
- Compute Type-Token Ratio (TTR) metrics for vocabulary richness.
84
+ if len(chunk_ttrs) < 2:
85
+ return None, None, None, None
86
+
87
+ deltas = [chunk_ttrs[i] - chunk_ttrs[i - 1] for i in range(1, len(chunk_ttrs))]
88
+ d_mean = statistics.mean(deltas)
89
+ d_std = statistics.stdev(deltas) if len(deltas) > 1 else 0.0
90
+ return d_mean, d_std, min(deltas), max(deltas)
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # Public API
95
+ # ---------------------------------------------------------------------------
96
+
19
97
 
20
- This is a facade wrapper around the stylometry-ttr package that provides
21
- multiple TTR variants for measuring lexical diversity. TTR measures the
22
- ratio of unique words (types) to total words (tokens).
98
+ def compute_ttr(
99
+ text: str,
100
+ text_id: str | None = None,
101
+ chunk_size: int = 1000,
102
+ ) -> TTRResult:
103
+ """Compute Type-Token Ratio (TTR) metrics for vocabulary richness.
104
+
105
+ Tokenises the input with pystylometry's ``Tokenizer`` (lowercase, words
106
+ only), then computes five TTR-family metrics. Each metric is computed
107
+ per-chunk and the full per-chunk distribution is exposed via a
108
+ ``Distribution`` object for stylometric fingerprinting.
23
109
 
24
110
  Metrics computed:
25
- - Raw TTR: unique_words / total_words
26
- - Root TTR (Guiraud's index): unique_words / sqrt(total_words)
27
- - Log TTR (Herdan's C): log(unique_words) / log(total_words)
28
- - STTR: Standardized TTR across fixed-size chunks (reduces length bias)
29
- - Delta Std: Standard deviation of TTR across chunks (vocabulary consistency)
111
+ - **Raw TTR**: ``unique_words / total_words``
112
+ - **Root TTR** (Guiraud's index): ``unique_words / sqrt(total_words)``
113
+ - **Log TTR** (Herdan's C): ``log(unique_words) / log(total_words)``
114
+ - **STTR**: Mean TTR across fixed-size chunks (reduces length bias).
115
+ Only computed when the text has >= 2000 words.
116
+ - **Delta Std**: Std-dev of chunk-to-chunk TTR change (vocabulary
117
+ consistency). Only computed when >= 2 chunks are available.
30
118
 
31
- Related GitHub Issue:
119
+ Related GitHub Issues:
32
120
  #27 - Native chunked analysis with Distribution dataclass
33
121
  https://github.com/craigtrim/pystylometry/issues/27
34
122
 
123
+ #43 - Inline stylometry-ttr into pystylometry
124
+ https://github.com/craigtrim/pystylometry/issues/43
125
+
35
126
  References:
36
- Guiraud, P. (1960). Problèmes et méthodes de la statistique linguistique.
127
+ Guiraud, P. (1960). Problèmes et méthodes de la statistique
128
+ linguistique.
37
129
  Herdan, G. (1960). Type-token Mathematics: A Textbook of Mathematical
38
130
  Linguistics. Mouton.
39
- Johnson, W. (1944). Studies in language behavior: I. A program of research.
40
- Psychological Monographs, 56(2), 1-15.
131
+ Johnson, W. (1944). Studies in language behavior: I. A program of
132
+ research. Psychological Monographs, 56(2), 1-15.
41
133
 
42
134
  Args:
43
- text: Input text to analyze
44
- text_id: Optional identifier for the text (for tracking purposes)
45
- chunk_size: Number of words per chunk (default: 1000).
46
- Note: The stylometry-ttr package handles its own internal chunking,
47
- so this parameter is included for API consistency but actual chunking
48
- behavior is delegated to stylometry-ttr.
135
+ text: Input text to analyse.
136
+ text_id: Optional identifier for the text (stored in metadata).
137
+ chunk_size: Number of words per chunk for STTR and per-chunk
138
+ distributions (default: 1000).
49
139
 
50
140
  Returns:
51
- TTRResult with all TTR variants and metadata, including Distribution
52
- objects for stylometric fingerprinting.
141
+ TTRResult with all TTR variants, Distribution objects, and metadata.
53
142
 
54
143
  Example:
55
144
  >>> result = compute_ttr("The quick brown fox jumps over the lazy dog.")
56
145
  >>> print(f"Raw TTR: {result.ttr:.3f}")
57
- Raw TTR: 0.900
146
+ Raw TTR: 1.000
58
147
  >>> print(f"Root TTR: {result.root_ttr:.3f}")
59
- Root TTR: 2.846
60
- >>> print(f"STTR: {result.sttr:.3f}")
61
- STTR: 1.000
148
+ Root TTR: 3.000
62
149
 
63
150
  >>> # With text identifier
64
151
  >>> result = compute_ttr("Sample text here.", text_id="sample-001")
65
152
  >>> print(result.metadata["text_id"])
66
153
  sample-001
67
154
  """
68
- try:
69
- from stylometry_ttr import compute_ttr as _compute_ttr
70
- except ImportError as e:
71
- raise ImportError(
72
- "TTR metrics require the stylometry-ttr package. "
73
- "This should have been installed as a core dependency. "
74
- "Install with: pip install stylometry-ttr"
75
- ) from e
76
-
77
- # Call the stylometry-ttr compute_ttr function
78
- # Note: stylometry-ttr requires text_id to be a string, not None
79
- ttr_result = _compute_ttr(text, text_id=text_id or "")
80
-
81
- # Extract values, handling None for short texts
82
- ttr_val = ttr_result.ttr
83
- root_ttr_val = ttr_result.root_ttr
84
- log_ttr_val = ttr_result.log_ttr
85
- sttr_val = ttr_result.sttr if ttr_result.sttr is not None else 0.0
86
- delta_std_val = ttr_result.delta_std if ttr_result.delta_std is not None else 0.0
87
-
88
- # Create single-value distributions from stylometry-ttr results
89
- # The stylometry-ttr package handles its own internal chunking for STTR
90
- # so we wrap the aggregate results in Distribution objects
91
- ttr_dist = (
92
- make_distribution([ttr_val])
93
- if ttr_val is not None
94
- else Distribution(
95
- values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
155
+ # Tokenise using pystylometry's own tokenizer (lowercase, words only)
156
+ tokenizer = Tokenizer(lowercase=True, strip_punctuation=True)
157
+ tokens = tokenizer.tokenize(text)
158
+
159
+ total_words = len(tokens)
160
+
161
+ # --- empty / trivial text --------------------------------------------------
162
+ if total_words == 0:
163
+ empty_dist = Distribution(
164
+ values=[],
165
+ mean=float("nan"),
166
+ median=float("nan"),
167
+ std=0.0,
168
+ range=0.0,
169
+ iqr=0.0,
96
170
  )
97
- )
98
- root_ttr_dist = (
99
- make_distribution([root_ttr_val])
100
- if root_ttr_val is not None
101
- else Distribution(
102
- values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
171
+ return TTRResult(
172
+ total_words=0,
173
+ unique_words=0,
174
+ ttr=0.0,
175
+ root_ttr=0.0,
176
+ log_ttr=0.0,
177
+ sttr=0.0,
178
+ delta_std=0.0,
179
+ ttr_dist=empty_dist,
180
+ root_ttr_dist=empty_dist,
181
+ log_ttr_dist=empty_dist,
182
+ sttr_dist=empty_dist,
183
+ delta_std_dist=empty_dist,
184
+ chunk_size=chunk_size,
185
+ chunk_count=0,
186
+ metadata={
187
+ "text_id": text_id or "",
188
+ "sttr_available": False,
189
+ "delta_std_available": False,
190
+ },
103
191
  )
104
- )
105
- log_ttr_dist = (
106
- make_distribution([log_ttr_val])
107
- if log_ttr_val is not None
108
- else Distribution(
109
- values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
110
- )
111
- )
112
- sttr_dist = (
113
- make_distribution([sttr_val])
114
- if ttr_result.sttr is not None
115
- else Distribution(
116
- values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
192
+
193
+ # --- global metrics --------------------------------------------------------
194
+ unique_words = len(set(tokens))
195
+ ttr_val = unique_words / total_words
196
+ root_ttr_val = unique_words / math.sqrt(total_words)
197
+ log_ttr_val = math.log(unique_words) / math.log(total_words) if total_words > 1 else 0.0
198
+
199
+ # --- per-chunk metrics -----------------------------------------------------
200
+ chunk_ttrs = _compute_chunk_ttrs(tokens, chunk_size)
201
+ chunk_count = len(chunk_ttrs)
202
+
203
+ # STTR: mean TTR across chunks (only meaningful with enough text)
204
+ sttr_available = total_words >= _MIN_WORDS_FOR_STTR and chunk_count >= 1
205
+ if sttr_available:
206
+ sttr_val = statistics.mean(chunk_ttrs)
207
+ else:
208
+ sttr_val = 0.0
209
+
210
+ # Delta metrics
211
+ delta_mean, delta_std_val, delta_min, delta_max = _compute_deltas(chunk_ttrs)
212
+ delta_std_available = delta_std_val is not None
213
+ if delta_std_val is None:
214
+ delta_std_val = 0.0
215
+
216
+ # --- build Distribution objects --------------------------------------------
217
+ # For per-chunk distributions: compute root_ttr and log_ttr per chunk as well
218
+ if chunk_count >= 1:
219
+ ttr_dist = make_distribution(chunk_ttrs)
220
+
221
+ # Root TTR per chunk: for each chunk of chunk_size tokens,
222
+ # root_ttr = unique / sqrt(chunk_size)
223
+ root_ttr_chunks = [
224
+ len(set(tokens[i : i + chunk_size])) / math.sqrt(chunk_size)
225
+ for i in range(0, total_words - chunk_size + 1, chunk_size)
226
+ ]
227
+ root_ttr_dist = make_distribution(root_ttr_chunks)
228
+
229
+ # Log TTR per chunk
230
+ log_ttr_chunks = []
231
+ for i in range(0, total_words - chunk_size + 1, chunk_size):
232
+ chunk = tokens[i : i + chunk_size]
233
+ u = len(set(chunk))
234
+ t = len(chunk)
235
+ val = math.log(u) / math.log(t) if t > 1 else 0.0
236
+ log_ttr_chunks.append(val)
237
+ log_ttr_dist = make_distribution(log_ttr_chunks)
238
+
239
+ sttr_dist = (
240
+ make_distribution(chunk_ttrs) if sttr_available else make_distribution([sttr_val])
117
241
  )
118
- )
119
- delta_std_dist = (
120
- make_distribution([delta_std_val])
121
- if ttr_result.delta_std is not None
122
- else Distribution(
123
- values=[], mean=float("nan"), median=float("nan"), std=0.0, range=0.0, iqr=0.0
242
+ delta_std_dist = (
243
+ make_distribution([delta_std_val]) if delta_std_available else make_distribution([0.0])
124
244
  )
125
- )
245
+ else:
246
+ # Not enough text for any chunks — wrap globals in single-value dists
247
+ ttr_dist = make_distribution([ttr_val])
248
+ root_ttr_dist = make_distribution([root_ttr_val])
249
+ log_ttr_dist = make_distribution([log_ttr_val])
250
+ sttr_dist = make_distribution([sttr_val])
251
+ delta_std_dist = make_distribution([0.0])
126
252
 
127
- # Convert to our TTRResult dataclass
128
253
  return TTRResult(
129
- total_words=ttr_result.total_words,
130
- unique_words=ttr_result.unique_words,
131
- ttr=ttr_val if ttr_val is not None else float("nan"),
132
- root_ttr=root_ttr_val if root_ttr_val is not None else float("nan"),
133
- log_ttr=log_ttr_val if log_ttr_val is not None else float("nan"),
134
- sttr=sttr_val,
135
- delta_std=delta_std_val,
254
+ total_words=total_words,
255
+ unique_words=unique_words,
256
+ ttr=round(ttr_val, 6),
257
+ root_ttr=round(root_ttr_val, 4),
258
+ log_ttr=round(log_ttr_val, 6),
259
+ sttr=round(sttr_val, 6),
260
+ delta_std=round(delta_std_val, 6),
136
261
  ttr_dist=ttr_dist,
137
262
  root_ttr_dist=root_ttr_dist,
138
263
  log_ttr_dist=log_ttr_dist,
139
264
  sttr_dist=sttr_dist,
140
265
  delta_std_dist=delta_std_dist,
141
266
  chunk_size=chunk_size,
142
- chunk_count=1, # stylometry-ttr returns aggregate results
267
+ chunk_count=chunk_count if chunk_count >= 1 else 1,
143
268
  metadata={
144
269
  "text_id": text_id or "",
145
- "source": "stylometry-ttr",
146
- "sttr_available": ttr_result.sttr is not None,
147
- "delta_std_available": ttr_result.delta_std is not None,
270
+ "sttr_available": sttr_available,
271
+ "delta_std_available": delta_std_available,
148
272
  },
149
273
  )
274
+
275
+
276
+ # ---------------------------------------------------------------------------
277
+ # Aggregation
278
+ # ---------------------------------------------------------------------------
279
+
280
+
281
+ class TTRAggregator:
282
+ """Aggregate per-text TTR results into group-level statistics.
283
+
284
+ Useful for comparing vocabulary richness across authors, genres, or
285
+ time periods by computing summary statistics (mean, std, min, max,
286
+ median) over a collection of ``TTRResult`` objects.
287
+
288
+ Related GitHub Issue:
289
+ #43 - Inline stylometry-ttr into pystylometry
290
+ https://github.com/craigtrim/pystylometry/issues/43
291
+
292
+ Example:
293
+ >>> from pystylometry.lexical import compute_ttr, TTRAggregator
294
+ >>> results = [compute_ttr(t) for t in texts]
295
+ >>> agg = TTRAggregator()
296
+ >>> stats = agg.aggregate(results, group_id="Shakespeare")
297
+ >>> print(stats.ttr_mean)
298
+ 0.412
299
+ """
300
+
301
+ def aggregate(self, results: list[TTRResult], group_id: str) -> TTRAggregateResult:
302
+ """Compute aggregate statistics from multiple TTR results.
303
+
304
+ Args:
305
+ results: List of per-text ``TTRResult`` objects.
306
+ group_id: Identifier for the group (e.g. author name).
307
+
308
+ Returns:
309
+ ``TTRAggregateResult`` with group-level statistics.
310
+
311
+ Raises:
312
+ ValueError: If *results* is empty.
313
+ """
314
+ if not results:
315
+ raise ValueError("Cannot aggregate empty results list")
316
+
317
+ ttrs = [r.ttr for r in results]
318
+ root_ttrs = [r.root_ttr for r in results]
319
+ log_ttrs = [r.log_ttr for r in results]
320
+ sttrs = [r.sttr for r in results if r.metadata.get("sttr_available")]
321
+ delta_stds = [r.delta_std for r in results if r.metadata.get("delta_std_available")]
322
+
323
+ return TTRAggregateResult(
324
+ group_id=group_id,
325
+ text_count=len(results),
326
+ total_words=sum(r.total_words for r in results),
327
+ ttr_mean=round(statistics.mean(ttrs), 6),
328
+ ttr_std=round(statistics.stdev(ttrs), 6) if len(ttrs) > 1 else 0.0,
329
+ ttr_min=round(min(ttrs), 6),
330
+ ttr_max=round(max(ttrs), 6),
331
+ ttr_median=round(statistics.median(ttrs), 6),
332
+ root_ttr_mean=round(statistics.mean(root_ttrs), 4),
333
+ root_ttr_std=round(statistics.stdev(root_ttrs), 4) if len(root_ttrs) > 1 else 0.0,
334
+ log_ttr_mean=round(statistics.mean(log_ttrs), 6),
335
+ log_ttr_std=round(statistics.stdev(log_ttrs), 6) if len(log_ttrs) > 1 else 0.0,
336
+ sttr_mean=round(statistics.mean(sttrs), 6) if sttrs else None,
337
+ sttr_std=round(statistics.stdev(sttrs), 6) if len(sttrs) > 1 else None,
338
+ delta_std_mean=round(statistics.mean(delta_stds), 6) if delta_stds else None,
339
+ metadata={"group_id": group_id, "text_count": len(results)},
340
+ )
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pystylometry
3
- Version: 1.3.1
3
+ Version: 1.3.5
4
4
  Summary: Comprehensive Python package for stylometric analysis
5
5
  License: MIT
6
6
  Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
7
7
  Author: Craig Trim
8
8
  Author-email: craigtrim@gmail.com
9
9
  Requires-Python: >=3.9,<4.0
10
- Classifier: Development Status :: 4 - Beta
10
+ Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
13
13
  Classifier: License :: OSI Approved :: MIT License
@@ -19,7 +19,6 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
20
20
  Classifier: Topic :: Text Processing :: Linguistic
21
21
  Classifier: Typing :: Typed
22
- Requires-Dist: stylometry-ttr (>=1.0.3,<2.0.0)
23
22
  Project-URL: Homepage, https://github.com/craigtrim/pystylometry
24
23
  Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
25
24
  Project-URL: Repository, https://github.com/craigtrim/pystylometry
@@ -1,7 +1,7 @@
1
1
  pystylometry/README.md,sha256=WFOtCAF3qtDTgGG3a_jTjNSwVgpQEXI1PKqbVBfyo1M,2366
2
- pystylometry/__init__.py,sha256=Z6zkHlX05SUeObDca9dL1Gkfq4UPBWbU2M4sp4fVj78,9220
2
+ pystylometry/__init__.py,sha256=bZ8xk66Mx7gj3K_I6594DoqATIGv1FtLFSJmF6Dz1g4,10462
3
3
  pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
4
- pystylometry/_types.py,sha256=g6XzwCHeMAIBfexId6Pd9EQfJzvZ0KYMfD4kpS5T7BQ,82284
4
+ pystylometry/_types.py,sha256=_YCkVyvHulmKkvmjzb73dcCOWJwiJZVhkV7sJcMr4YY,83618
5
5
  pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
6
6
  pystylometry/authorship/README.md,sha256=zNXCpLj7nczPnYykJnCUw3y-kxfC9mWZmngi3nfw6us,1016
7
7
  pystylometry/authorship/__init__.py,sha256=D7m38hWi_62o1ZDSrghLCfob9YsykTht4K37wiVgHfg,1530
@@ -13,7 +13,7 @@ pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeo
13
13
  pystylometry/character/README.md,sha256=poQwhbI8MabVD_626CWjEL87IOX5YDGS0ZJTH1hNwEE,607
14
14
  pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
15
15
  pystylometry/character/character_metrics.py,sha256=OCIGP_ivtwtzcifcxcbmp2R5SIKh2tKyvKcHAv64S8g,14029
16
- pystylometry/cli.py,sha256=z0yx2O_E05tHT9_BHgSaQ2zq5_fBERXfhbYHcuQ2y-A,15477
16
+ pystylometry/cli.py,sha256=HvzBZxFSiS5AAXCb6N9Eo3QonkH-ucRFp6xDF1kJTQ0,26380
17
17
  pystylometry/consistency/README.md,sha256=HG_Rd6WRBnIz3M7J11dVDv1S2ARkMABFYrTn-VV8xRY,1058
18
18
  pystylometry/consistency/__init__.py,sha256=l7nzpS7M4yHDBbM2LGAtW0XGT2n7YjSey_1xKf45224,2181
19
19
  pystylometry/consistency/_thresholds.py,sha256=5fZwdJ_cnDy0ED7CCYs6V_zP6kIAR1p0h0NYkbZ0HRg,6381
@@ -24,13 +24,13 @@ pystylometry/dialect/_data/dialect_markers.json,sha256=DthluOA6q0rG_8IrCrFIYWh_E
24
24
  pystylometry/dialect/_loader.py,sha256=M2ATp-5754v_yX9EWvBP0r5qgNf8xlL8XadVsVb_Hco,12989
25
25
  pystylometry/dialect/detector.py,sha256=9x0ZuIfTIjsmdNSx0Ezy5AC0SAFtC4kVw11iOSBd9gQ,20147
26
26
  pystylometry/lexical/README.md,sha256=cFQ7KRZV4ubsQwIlOH3YHTbhhNl5X91Sr3zcn-3x0HI,1185
27
- pystylometry/lexical/__init__.py,sha256=_VpemdfVYZYXHP4ulTItoyegJ-3lE85wlfzDCpseaNE,898
27
+ pystylometry/lexical/__init__.py,sha256=ib_F-NGVydLNGT_HgaWurBT25AadTE4eNcAN1lGMKmQ,934
28
28
  pystylometry/lexical/advanced_diversity.py,sha256=rL1hlNqTnaEFcA2v4oBJlojHZMTqdvvm4jYXTFGVpYE,25664
29
29
  pystylometry/lexical/function_words.py,sha256=eel9bq_qWgWlvG0NtDiouilMt9kaFqz2rh3add2UC4U,17832
30
30
  pystylometry/lexical/hapax.py,sha256=djTqZyZIYXa3GRiPoy6TTGHPm0wCRNJ9U0Rwnf5NoDk,12173
31
31
  pystylometry/lexical/mtld.py,sha256=XpeCF8sOXZhWbaazHGuqm08mrOf_DYfkfGGAltWnyy4,7101
32
32
  pystylometry/lexical/repetition.py,sha256=A9L0oNwfnCepVkWy57kjHV47Pw4M6fZXEl25hBVdq2s,18318
33
- pystylometry/lexical/ttr.py,sha256=iEsXkoSPyZEyiiFwKatKA8KhLRukD7RDRvyRkRQOTsk,5848
33
+ pystylometry/lexical/ttr.py,sha256=igS8gnvIv57zvjQPtmIgkB5Wj7jdaKSMRpJ1WvMfKtw,13091
34
34
  pystylometry/lexical/word_frequency_sophistication.py,sha256=OHOS0fBvd1Bz8zsJk-pJbWLTgImmBd-aewQnp_kq8BY,38828
35
35
  pystylometry/lexical/yule.py,sha256=NXggha8jmQCu4i-qKZpISwyJBqNpuPHyVR86BLDLgio,5192
36
36
  pystylometry/ngrams/README.md,sha256=50wyaWcLGbosLzTPR1cXdE_xAVU8jVY7fd3ReEk9KnY,802
@@ -71,8 +71,8 @@ pystylometry/viz/jsx/_base.py,sha256=nd7kEc13fUcRMom3A5jqjGyTy-djIeydq2k3oPHZIHY
71
71
  pystylometry/viz/jsx/report.py,sha256=DbbHnnNAEi5tmVg4PmiHb17vkBBXujyE4x1CfVBiOBw,25857
72
72
  pystylometry/viz/jsx/timeline.py,sha256=hor-xnBa6oVkSqN0AEZUCQFBOB-iTfHSFZHiEfeakPA,30716
73
73
  pystylometry/viz/jsx/viewer.py,sha256=3LO49d_2bRf_P-P-2oSKpKx4N8Ugo4oCLb3DtvyNxXI,43716
74
- pystylometry-1.3.1.dist-info/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
75
- pystylometry-1.3.1.dist-info/METADATA,sha256=Nn-0-ABq9tykuxWpC79GkhHO71oWLnAseh0z9R3mycs,4813
76
- pystylometry-1.3.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
77
- pystylometry-1.3.1.dist-info/entry_points.txt,sha256=iHOaFXlyiwcQM1LlID2gWGmN4DBLdTSpKGjttU8tgm8,113
78
- pystylometry-1.3.1.dist-info/RECORD,,
74
+ pystylometry-1.3.5.dist-info/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
75
+ pystylometry-1.3.5.dist-info/METADATA,sha256=DEFzNf_ZQd3mulwOnFMRSyc1s30alGM6UtO-L7nloPc,4779
76
+ pystylometry-1.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
77
+ pystylometry-1.3.5.dist-info/entry_points.txt,sha256=XsJvKgKs3LRDuzdF45JO7ZnS0wGKg1f5qsxVYSZzLp8,165
78
+ pystylometry-1.3.5.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  [console_scripts]
2
2
  pystylometry-drift=pystylometry.cli:drift_cli
3
+ pystylometry-tokenize=pystylometry.cli:tokenize_cli
3
4
  pystylometry-viewer=pystylometry.cli:viewer_cli
4
5