PyPI - slithyt - Versions diffs - 1.0.0__py3-none-any.whl - Mend

slithyt 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

slithyt/__init__.py +0 -0
slithyt/build.py +61 -0
slithyt/cli.py +153 -0
slithyt/data/__init__.py +0 -0
slithyt/generator.py +94 -0
slithyt/pronounce.py +60 -0
slithyt/rhyme.py +87 -0
slithyt/sentiment.py +119 -0
slithyt/utils.py +21 -0
slithyt/validator.py +58 -0
slithyt-1.0.0.dist-info/METADATA +103 -0
slithyt-1.0.0.dist-info/RECORD +16 -0
slithyt-1.0.0.dist-info/WHEEL +5 -0
slithyt-1.0.0.dist-info/entry_points.txt +2 -0
slithyt-1.0.0.dist-info/licenses/LICENSE +7 -0
slithyt-1.0.0.dist-info/top_level.txt +1 -0

slithyt/__init__.py ADDED Viewed

File without changes

slithyt/build.py ADDED Viewed

@@ -0,0 +1,61 @@
+# src/slithyt/build.py
+import pickle
+import pathlib
+from collections import defaultdict
+from . import utils
+import pronouncing
+def build_phonetic_model(corpus_path: str, n: int = 3) -> dict:
+    """Builds a phonetic n-gram model from a word corpus."""
+    model = defaultdict(list)
+    prefix_len = n - 1
+    with utils.open_any(corpus_path) as f:
+        for i, word in enumerate(f):
+            if (i + 1) % 20000 == 0:
+                print(f"  ...processed {i+1} words for phonetic model...")
+            word = word.strip().lower()
+            if not word: continue
+            phones_list = pronouncing.phones_for_word(word)
+            if not phones_list: continue
+            phonemes = phones_list[0].split()
+            padded_phonemes = (["^"] * prefix_len) + phonemes + ["$"]
+            for i in range(len(padded_phonemes) - prefix_len):
+                prefix = tuple(padded_phonemes[i : i + prefix_len])
+                next_phoneme = padded_phonemes[i + prefix_len]
+                model[prefix].append(next_phoneme)
+    return dict(model)
+def build_transcription_model(corpus_path: str) -> dict:
+    """Builds a statistical model for transcribing phonemes to graphemes."""
+    model = defaultdict(lambda: defaultdict(int))
+    with utils.open_any(corpus_path) as f:
+        for i, word in enumerate(f):
+            if (i + 1) % 20000 == 0:
+                print(f"  ...processed {i+1} words for transcription model...")
+            word = word.strip().lower()
+            if not word: continue
+            phones_list = pronouncing.phones_for_word(word)
+            if not phones_list: continue
+            phonemes = phones_list[0].split()
+            if len(phonemes) == len(word):
+                for i, p in enumerate(phonemes):
+                    base_phoneme = p.rstrip('012')
+                    letter = word[i]
+                    model[base_phoneme][letter] += 1
+    final_model = {}
+    for phoneme, spellings in model.items():
+        sorted_spellings = sorted(spellings.items(), key=lambda item: item[1], reverse=True)
+        final_model[phoneme] = [s[0] for s in sorted_spellings[:3]]
+    return final_model

slithyt/cli.py ADDED Viewed

@@ -0,0 +1,153 @@
+# src/slithyt/cli.py
+import argparse
+import pathlib
+import pickle
+from . import generator, validator, sentiment, pronounce, rhyme, build
+def main():
+    """Main function for the command-line interface."""
+    parser = argparse.ArgumentParser(description="SlithyT: A plausible word generation tool.")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    # --- Generate command ---
+    gen_parser = subparsers.add_parser("generate", help="Generate new words.")
+    gen_parser.add_argument("--corpus", help="Path to the corpus file for training. Required unless using --rhymes-with.")
+    # ... (all other generate arguments)
+    gen_parser.add_argument("--count", type=int, default=10)
+    gen_parser.add_argument("--min-len", type=int, default=5)
+    gen_parser.add_argument("--max-len", type=int, default=10)
+    gen_parser.add_argument("--matches-regex")
+    gen_parser.add_argument("--reject-regex")
+    gen_parser.add_argument("--dictionary")
+    gen_parser.add_argument("--blocklist")
+    gen_parser.add_argument("--ngram-size", type=int, default=3)
+    gen_parser.add_argument("--min-sentiment", type=float)
+    gen_parser.add_argument("--max-sentiment", type=float)
+    gen_parser.add_argument("--min-pronounceability", type=float)
+    gen_parser.add_argument("--rhymes-with")
+    gen_parser.add_argument("--allow-corpus-words", action="store_true")
+    # --- Validate command ---
+    val_parser = subparsers.add_parser("validate", help="Validate a potential word.")
+    val_parser.add_argument("word")
+    val_parser.add_argument("--dictionary")
+    val_parser.add_argument("--blocklist")
+    # --- Rhyme command ---
+    rhyme_parser = subparsers.add_parser("rhyme", help="Get phonetic info for a word.")
+    rhyme_parser.add_argument("word")
+    # --- Build Cache command ---
+    build_parser = subparsers.add_parser("build-cache", help="Build the phonetic and transcription models.")
+    build_parser.add_argument("--corpus", help="Path to a custom corpus to build models from.")
+    args = parser.parse_args()
+    # --- Argument Validation ---
+    if args.command == "generate" and not args.corpus and not args.rhymes_with:
+        parser.error("--corpus is required unless --rhymes-with is used.")
+    # --- Command Execution ---
+    if args.command == "build-cache":
+        module_path = pathlib.Path(__file__).parent
+        default_dict_path = module_path / 'data' / 'cmu.txt.gz'
+        corpus_to_use = args.corpus if args.corpus else str(default_dict_path)
+        cache_dir = pathlib.Path.home() / '.slithyt' / 'data'
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        phonetic_model = build.build_phonetic_model(corpus_to_use)
+        with open(cache_dir / 'phonetic-model.dat', "wb") as f:
+            pickle.dump(phonetic_model, f)
+        print(f"Phonetic model saved to {cache_dir / 'phonetic-model.dat'}")
+        transcription_model = build.build_transcription_model(corpus_to_use)
+        with open(cache_dir / 'transcription-model.dat', "wb") as f:
+            pickle.dump(transcription_model, f)
+        print(f"Transcription model saved to {cache_dir / 'transcription-model.dat'}")
+        return
+    if args.command == "generate" or args.command == "validate":
+        module_path = pathlib.Path(__file__).parent
+        default_dict_path = module_path / 'data' / 'cmu.txt.gz'
+        default_block_path = module_path / 'data' / 'en-block.txt.gz'
+        block_to_load = args.blocklist if args.blocklist is not None else default_block_path
+        blocklist_set = validator.load_word_set(str(block_to_load))
+        dictionary_set = set()
+        dict_to_load = args.dictionary if args.dictionary is not None else default_dict_path
+        if not (args.command == "generate" and hasattr(args, 'corpus') and args.corpus and str(dict_to_load) == args.corpus):
+            dictionary_set = validator.load_word_set(str(dict_to_load))
+    if args.command == "generate":
+        if args.rhymes_with:
+            cache_dir = pathlib.Path.home() / '.slithyt' / 'data'
+            phonetic_model_path = cache_dir / 'phonetic-model.dat'
+            transcription_model_path = cache_dir / 'transcription-model.dat'
+            phonetic_model = rhyme.load_phonetic_model(str(phonetic_model_path))
+            transcription_model = rhyme.load_transcription_model(str(transcription_model_path))
+            if not phonetic_model or not transcription_model: return
+            target_phonemes = rhyme.get_phonetic_breakdown(args.rhymes_with)
+            if not target_phonemes:
+                print(f"ERROR: Cannot find '{args.rhymes_with}' in phonetic dictionary.")
+                return
+            signature = rhyme.get_rhyme_signature(target_phonemes)
+            if not signature:
+                print(f"ERROR: Cannot find a valid rhyme signature for '{args.rhymes_with}'.")
+                return
+            print(f"INFO: Generating words that rhyme with '{args.rhymes_with}'...")
+            generated_words = []
+            for _ in range(args.count * 200):
+                if len(generated_words) >= args.count: break
+                new_phonemes = rhyme.generate_phonetic_word(phonetic_model, signature)
+                if not new_phonemes: continue
+                word = rhyme.transcribe_word(transcription_model, new_phonemes)
+                if word and word not in generated_words and validator.validate_word(
+                    word, args.matches_regex, args.reject_regex, dictionary_set, blocklist_set,
+                    None, args.min_sentiment, args.max_sentiment, args.min_pronounceability
+                ):
+                    generated_words.append(word)
+                    print(f"  - {word}")
+        else:
+            print(f"INFO: Training model from '{args.corpus}'...")
+            model, corpus_set = generator.train_from_corpus(args.corpus, n=args.ngram_size)
+            if not model: return
+            corpus_rejection_set = None if args.allow_corpus_words else corpus_set
+            print(f"INFO: Generating {args.count} words...")
+            generated_words = []
+            for _ in range(args.count * 100):
+                if len(generated_words) >= args.count: break
+                word = generator.generate_word(model, args.min_len, args.max_len, n=args.ngram_size)
+                if word and word not in generated_words and validator.validate_word(
+                    word, args.matches_regex, args.reject_regex, dictionary_set, blocklist_set,
+                    corpus_rejection_set, args.min_sentiment, args.max_sentiment, args.min_pronounceability
+                ):
+                    generated_words.append(word)
+                    print(f"  - {word}")
+    elif args.command == "validate":
+        is_valid = validator.validate_word(args.word, dictionary_set=dictionary_set, blocklist_set=blocklist_set)
+        s_score = sentiment.analyze_word_sentiment(args.word)
+        p_score = pronounce.score_pronounceability(args.word)
+        print(f"Validating word: '{args.word}'")
+        print(f"  - Validation Result:      {'Valid' if is_valid else 'Invalid'}")
+        print(f"  - Sentiment Score:        {s_score:.3f}")
+        print(f"  - Pronounceability Score: {p_score:.3f}")
+    elif args.command == "rhyme":
+        print(f"Analyzing word: '{args.word}'")
+        phonemes = rhyme.get_phonetic_breakdown(args.word)
+        if not phonemes:
+            print("  - Word not found in the phonetic dictionary.")
+            return
+        print(f"  - Phonetic Breakdown: {' '.join(phonemes)}")
+        signature = rhyme.get_rhyme_signature(phonemes)
+        if signature:
+            print(f"  - Rhyme Signature:    {' '.join(signature)}")
+if __name__ == "__main__":
+    main()

slithyt/data/__init__.py ADDED Viewed

File without changes

slithyt/generator.py ADDED Viewed

@@ -0,0 +1,94 @@
+# Contains the n-gram model training and word generation logic.
+import random
+from collections import defaultdict
+from . import utils
+def train_from_corpus(corpus_path: str, n: int = 3) -> tuple[dict, set]:
+    """
+    Reads a corpus file once to train a character-level n-gram model
+    and create a set of all words in the corpus for novelty checking.
+    The model is a dictionary where keys are prefixes of length (n-1)
+    and values are lists of characters that can follow that prefix.
+    Args:
+        corpus_path: Path to the text file to train on (one word per line).
+        n: The order of the n-gram model (e.g., 3 for trigrams).
+    Returns:
+        A tuple containing (model_dict, corpus_word_set).
+    """
+    model = defaultdict(list)
+    corpus_word_set = set()
+    # Use special characters for start and end of a word
+    start_char = "^"
+    end_char = "$"
+    prefix_len = n - 1
+    try:
+        with utils.open_any(corpus_path) as f:
+            for line in f:
+                word = line.strip().lower()
+                if not word:
+                    continue
+                corpus_word_set.add(word)
+                # Pad the word with start/end markers
+                padded_word = (start_char * prefix_len) + word + end_char
+                for i in range(len(padded_word) - prefix_len):
+                    prefix = padded_word[i : i + prefix_len]
+                    next_char = padded_word[i + prefix_len]
+                    model[prefix].append(next_char)
+    except FileNotFoundError:
+        print(f"ERROR: Corpus file not found at {corpus_path}")
+        return {}, set()
+    return dict(model), corpus_word_set
+def generate_word(model: dict, min_len: int = 5, max_len: int = 10, n: int = 3) -> str:
+    """
+    Generates a single word using the trained n-gram model.
+    Args:
+        model: The trained n-gram model from train_model().
+        min_len: The minimum length of the generated word.
+        max_len: The maximum length of the generated word.
+        n: The order of the n-gram model used for generation.
+    Returns:
+        A newly generated word as a string, or an empty string if generation fails.
+    """
+    if not model:
+        return ""
+    start_char = "^"
+    end_char = "$"
+    prefix_len = n - 1
+    # Loop until a valid word is generated
+    for _ in range(100): # Max attempts to prevent infinite loops
+        word_chars = []
+        current_prefix = start_char * prefix_len
+        for _ in range(max_len):
+            if current_prefix not in model:
+                # This prefix was not seen during training, dead end.
+                break
+            next_char = random.choice(model[current_prefix])
+            if next_char == end_char:
+                break
+            word_chars.append(next_char)
+            current_prefix = current_prefix[1:] + next_char
+        final_word = "".join(word_chars)
+        if min_len <= len(final_word) <= max_len:
+            return final_word
+    return "" # Return empty if we couldn't generate a valid word

slithyt/pronounce.py ADDED Viewed

@@ -0,0 +1,60 @@
+# slithyt/pronounce.py
+def score_pronounceability(word: str) -> float:
+    """
+    Calculates a pronounceability score for a word based on heuristics.
+    The score is between 0.0 (less pronounceable) and 1.0 (more pronounceable).
+    Args:
+        word: The word to score.
+    Returns:
+        A float representing the pronounceability score.
+    """
+    if not word:
+        return 0.0
+    word_lower = word.lower()
+    vowels = "aeiou"
+    # Heuristic 1: Penalize long consonant clusters
+    max_consonant_cluster = 0
+    current_consonant_cluster = 0
+    for char in word_lower:
+        if char not in vowels:
+            current_consonant_cluster += 1
+        else:
+            max_consonant_cluster = max(max_consonant_cluster, current_consonant_cluster)
+            current_consonant_cluster = 0
+    max_consonant_cluster = max(max_consonant_cluster, current_consonant_cluster)
+    # A cluster of more than 3 consonants is difficult.
+    consonant_penalty = max(0, max_consonant_cluster - 3) * 0.3
+    # Heuristic 2: Penalize long vowel clusters
+    max_vowel_cluster = 0
+    current_vowel_cluster = 0
+    for char in word_lower:
+        if char in vowels:
+            current_vowel_cluster += 1
+        else:
+            max_vowel_cluster = max(max_vowel_cluster, current_vowel_cluster)
+            current_vowel_cluster = 0
+    max_vowel_cluster = max(max_vowel_cluster, current_vowel_cluster)
+    # A cluster of more than 2 vowels is uncommon.
+    vowel_penalty = max(0, max_vowel_cluster - 2) * 0.4
+    # Heuristic 3: Ideal vowel-to-consonant ratio (35%-65% vowels)
+    num_vowels = sum(1 for char in word_lower if char in vowels)
+    vowel_ratio = num_vowels / len(word_lower) if len(word_lower) > 0 else 0
+    ratio_penalty = 0
+    if not (0.35 <= vowel_ratio <= 0.65):
+        ratio_penalty = 0.3
+    # Calculate final score
+    total_penalty = consonant_penalty + vowel_penalty + ratio_penalty
+    score = max(0.0, 1.0 - total_penalty)
+    return score

slithyt/rhyme.py ADDED Viewed

@@ -0,0 +1,87 @@
+# src/slithyt/rhyme.py
+import pronouncing
+import pickle
+import random
+import pathlib
+from . import build
+def get_phonetic_breakdown(word: str) -> list[str] | None:
+    """Gets the phonetic breakdown for a word."""
+    pronunciations = pronouncing.phones_for_word(word)
+    if not pronunciations:
+        return None
+    return pronunciations[0].split()
+def get_rhyme_signature(phonemes: list[str]) -> list[str] | None:
+    """Extracts the rhyming part of a word from its list of phonemes."""
+    last_stressed_vowel_index = -1
+    for i, p in enumerate(phonemes):
+        if p[-1] in ('1', '2'):
+            last_stressed_vowel_index = i
+    if last_stressed_vowel_index == -1:
+        return None
+    return phonemes[last_stressed_vowel_index:]
+def load_phonetic_model(model_path: str) -> dict:
+    """Loads a pre-computed phonetic model, building it if it doesn't exist."""
+    model_path = pathlib.Path(model_path)
+    if model_path.exists():
+        with open(model_path, "rb") as f:
+            return pickle.load(f)
+    else:
+        print("First-time setup: Building phonetic model. This may take a moment...")
+        module_path = pathlib.Path(__file__).parent
+        default_dict_path = module_path / 'data' / 'cmu.txt.gz'
+        model = build.build_phonetic_model(str(default_dict_path))
+        model_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(model_path, "wb") as f:
+            pickle.dump(model, f)
+        print(f"Phonetic model saved to {model_path}")
+        return model
+def load_transcription_model(model_path: str) -> dict:
+    """Loads a pre-computed transcription model, building it if it doesn't exist."""
+    model_path = pathlib.Path(model_path)
+    if model_path.exists():
+        with open(model_path, "rb") as f:
+            return pickle.load(f)
+    else:
+        print("First-time setup: Building transcription model. This may take a moment...")
+        module_path = pathlib.Path(__file__).parent
+        default_dict_path = module_path / 'data' / 'cmu.txt.gz'
+        model = build.build_transcription_model(str(default_dict_path))
+        model_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(model_path, "wb") as f:
+            pickle.dump(model, f)
+        print(f"Transcription model saved to {model_path}")
+        return model
+def generate_phonetic_word(model: dict, rhyme_signature: list[str], n: int = 3) -> list[str] | None:
+    """Generates a new sequence of phonemes that ends with the given rhyme signature."""
+    if not model: return None
+    prefix_len = n - 1
+    current_prefix = tuple(["^"] * prefix_len)
+    generated_phonemes = []
+    for _ in range(10):
+        if current_prefix not in model: return None
+        next_phoneme = random.choice(model[current_prefix])
+        if next_phoneme == "$": break
+        generated_phonemes.append(next_phoneme)
+        current_prefix = tuple(list(current_prefix[1:]) + [next_phoneme])
+    return generated_phonemes + rhyme_signature
+def transcribe_word(transcription_model: dict, phonemes: list[str]) -> str:
+    """Transcribes a sequence of phonemes into a plausible word spelling."""
+    word = []
+    for p in phonemes:
+        base_phoneme = p.rstrip('012')
+        if base_phoneme in transcription_model and transcription_model[base_phoneme]:
+            word.append(random.choice(transcription_model[base_phoneme]))
+        else:
+            word.append('?')
+    return "".join(word)

slithyt/sentiment.py ADDED Viewed

@@ -0,0 +1,119 @@
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+# Initialize the analyzer for its word lexicon.
+_analyzer = SentimentIntensityAnalyzer()
+# --- Structured Morpheme Lexicons ---
+_INVERTING_PREFIXES = {"un", "in", "im", "il", "ir", "non", "dis", "mis", "dys", "anti"}
+_INVERTING_SUFFIXES = {"less"}
+_PREFIXES = {
+    "mal": -4.0, "mis": -3.0, "dis": -2.0, "un": -1.0, "in": -1.0, "im": -1.0,
+    "non": -1.0, "de": -1.0, "anti": -2.0, "contra": -2.0, "ob": -2.0,
+    "pseudo": -2.0, "cata": -2.0, "dys": -2.2, "caco": -2.3,
+    "bene": 3.0, "eu": 4.0, "pro": 2.0, "pre": 1.0, "con": 2.0, "com": 2.0,
+    "sym": 2.0, "syn": 2.0,
+}
+_SUFFIXES = {
+    "less": -2.0, "cide": -4.0, "ful": 1.5, "able": 1.0, "ible": 1.0,
+}
+_INFIXES = {
+    "mort": -3.0, "nec": -3.0, "necr": -3.0, "path": -3.0, "tox": -4.0,
+    "pess": -3.0, "mor": -3.0, "vill": -3.0, "crim": -3.0, "rupt": -2.0,
+    "fail": -3.0, "terr": -2.0, "horr": -4.0, "vuln": -2.0, "hostil": -3.0,
+    "vex": -2.0, "trib": -2.0, "fall": -2.0, "err": -1.9,
+    "am": 3.0, "amic": 3.0, "phil": 3.0, "pac": 4.0, "grat": 4.0,
+    "felic": 4.0, "beat": 4.0, "sanct": 3.0, "salv": 3.0, "ver": 3.0,
+    "honor": 3.0, "dign": 3.0, "fortun": 2.0, "optim": 4.0, "lucr": 2.0,
+    "prosper": 4.0, "brill": 3.0, "clar": 2.0, "lumin": 3.0, "vital": 3.0,
+    "viv": 3.0, "gen": 2.0, "cresc": 2.0, "cret": 2.0, "magn": 3.0,
+    "grand": 3.0, "nobl": 3.0, "excell": 4.0, "laud": 4.0, "glor": 3.0,
+    "merit": 3.0, "secure": 3.0, "firm": 2.0, "resolut": 2.0, "joy": 4.0,
+    "happ": 4.0, "hope": 3.0, "vit": 2.0, "equi": 1.5, "amor": 2.8,
+    "bon": 2.5, "luc": 1.8, "lum": 1.8, "cred": 1.7,
+}
+_WORD_LEXICON = _analyzer.lexicon
+_SORTED_PREFIXES = sorted(_PREFIXES.keys(), key=len, reverse=True)
+_SORTED_SUFFIXES = sorted(_SUFFIXES.keys(), key=len, reverse=True)
+def _normalize_score(score: float) -> float:
+    """Normalizes a VADER score to a 0.0-1.0 scale."""
+    return (score + 4) / 8
+def analyze_word_sentiment(word: str) -> float:
+    """
+    Analyzes word sentiment using a recursive, positional, multi-pass algorithm.
+    """
+    word_lower = word.lower()
+    if not word_lower:
+        return 0.5
+    if word_lower in _WORD_LEXICON:
+        return _normalize_score(_WORD_LEXICON[word_lower])
+    for p in _SORTED_PREFIXES:
+        if len(p) >= 2 and word_lower.startswith(p):
+            prefix_score = _PREFIXES[p]
+            stem = word_lower[len(p):]
+            if len(stem) < 4:
+                return _normalize_score(prefix_score)
+            stem_sentiment = analyze_word_sentiment(stem)
+            # If the stem is neutral, the prefix's sentiment dominates.
+            if stem_sentiment == 0.5:
+                return _normalize_score(prefix_score)
+            if p in _INVERTING_PREFIXES:
+                return 1.0 - stem_sentiment
+            avg_raw_score = (prefix_score + (stem_sentiment * 8 - 4)) / 2
+            return _normalize_score(avg_raw_score)
+    for s in _SORTED_SUFFIXES:
+        if len(s) >= 2 and word_lower.endswith(s):
+            suffix_score = _SUFFIXES[s]
+            stem = word_lower[:-len(s)]
+            if len(stem) < 4:
+                return _normalize_score(suffix_score)
+            stem_sentiment = analyze_word_sentiment(stem)
+            if stem_sentiment == 0.5:
+                return _normalize_score(suffix_score)
+            if s in _INVERTING_SUFFIXES:
+                return 1.0 - stem_sentiment
+            avg_raw_score = (suffix_score + (stem_sentiment * 8 - 4)) / 2
+            return _normalize_score(avg_raw_score)
+    found_scores = []
+    i = 0
+    while i < len(word_lower):
+        best_match = ""
+        for j in range(len(word_lower), i, -1):
+            substring = word_lower[i:j]
+            if len(substring) >= 3 and substring in _INFIXES:
+                best_match = substring
+                break
+        if best_match:
+            found_scores.append(_INFIXES[best_match])
+            i += len(best_match)
+        else:
+            i += 1
+    if not found_scores:
+        return 0.5
+    avg_score = sum(found_scores) / len(found_scores)
+    return _normalize_score(avg_score)

slithyt/utils.py ADDED Viewed

@@ -0,0 +1,21 @@
+import gzip
+def open_any(file_path: str):
+    """
+    Opens a file, transparently handling whether it is gzipped or plain text
+    by checking for the gzip magic number.
+    Args:
+        file_path: The path to the file to open.
+    Returns:
+        A file handle ready for reading in text mode.
+    """
+    with open(file_path, 'rb') as f:
+        is_gzipped = (f.read(2) == b'\x1f\x8b')
+    # Return the correct file handle based on the check
+    if is_gzipped:
+        return gzip.open(file_path, 'rt', encoding="utf-8")
+    else:
+        return open(file_path, 'r', encoding="utf-8")

slithyt/validator.py ADDED Viewed

@@ -0,0 +1,58 @@
+import re
+from typing import Set
+from . import sentiment
+from . import pronounce
+from . import utils
+def load_word_set(file_path: str) -> Set[str]:
+    """
+    Loads a word list from a plain text or gzipped file into a set
+    for efficient lookup.
+    """
+    if not file_path:
+        return set()
+    try:
+        with utils.open_any(file_path) as f:
+            return {line.strip().lower() for line in f if line.strip()}
+    except FileNotFoundError:
+        print(f"WARNING: File not found at {file_path}. Skipping this check.")
+        return set()
+def validate_word(
+    word: str,
+    matches_regex: str = None,
+    reject_regex: str = None,
+    dictionary_set: set[str] = None,
+    blocklist_set: set[str] = None,
+    corpus_rejection_set: set[str] = None,
+    min_sentiment: float = None,
+    max_sentiment: float = None,
+    min_pronounceability: float = None
+) -> bool:
+    """
+    Validates a word against a set of constraints.
+    """
+    if not word:
+        return False
+    word_lower = word.lower()
+    if matches_regex and not re.search(matches_regex, word, re.IGNORECASE):
+        return False
+    if reject_regex and re.search(reject_regex, word, re.IGNORECASE):
+        return False
+    if dictionary_set and word_lower in dictionary_set:
+        return False
+    if blocklist_set and word_lower in blocklist_set:
+        return False
+    if corpus_rejection_set and word_lower in corpus_rejection_set:
+        return False
+    if min_sentiment is not None or max_sentiment is not None:
+        score = sentiment.analyze_word_sentiment(word)
+        if min_sentiment is not None and score < min_sentiment:
+            return False
+        if max_sentiment is not None and score > max_sentiment:
+            return False
+    if min_pronounceability is not None:
+        score = pronounce.score_pronounceability(word)
+        if score < min_pronounceability:
+            return False
+    return True

slithyt-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,103 @@
+Metadata-Version: 2.4
+Name: slithyt
+Version: 1.0.0
+Summary: A tool for generating novel, pronounceable words based on linguistic corpuses.
+Author-email: Daniel Hardman <daniel.hardman@gmail.com>
+License: MIT License
+Project-URL: Homepage, https://github.com/dhh1128/slithyt
+Project-URL: Bug Tracker, https://github.com/dhh1128/slithyt/issues
+Keywords: word generation,procedural generation,nlp,linguistics,naming
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Text Processing :: Linguistic
+Classifier: Development Status :: 4 - Beta
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pronouncing
+Requires-Dist: vaderSentiment
+Dynamic: license-file
+# SlithyT
+A tool for generating novel, plausible, and pronounceable words based on linguistic corpuses.
+The name is a reference to the "slithy toves" in Lewis Carroll's poem "Jabberwocky".
+(Code was written substantially by AI, although I did a fair amount of reviewing, criticizing, revising
+and debugging.)
+## Installation
+```bash
+pip install .
+```
+## Usage
+Generate a word that looks/sounds like it fits with other words in a given
+corpus. Similarity is determined partly by ngram analysis and partly by
+pronunciation.
+You can make your own corpus, or use pregenerated ones (in the data folder
+of the package):
+* Astronomy names (stars, galaxies, planets)
+* Transliterated Greek, Latin, Hebrew, Egyptian names
+* Harry Potter or Star Wars names
+* Drug names
+* Latin words from biology taxonomy (genus, species)
+You can also use the whole dictionary as your corpus, in which case you will
+get words with no particular flavor to them. A good corpus has at least a
+couple hundred words in it.
+By default, generated words are *novel*, meaning they won't appear in the
+corpus you reference. You can also add a blocklist to avoid generating curse
+words, words that violate trademarks or spam filters, etc.
+All corpora and dictionary/block list files used by this tool are text
+files having a single word per line, and can optionally be gzipped.
+Sentiment analysis, pronounceability, and rhyming are moderately English-
+centric, though the tolerate romance and germanic languages a bit as well.
+However, they could be made to reflect the sensibilities of other language
+communities by running build_phonetic_model.py and build_transcription_model.py
+in the package's scripts folder. These generate cached patterns in
+~/.slithyt/data.
+```bash
+# Generate 10 realistic words that sound like they belong in corpus. Make
+# the words have a length of at least 5 characters.
+slithyt generate --corpus path/to/your/corpus.txt
+# Generate words that have a positive connotation due to sound symbolism
+# (see https://en.wikipedia.org/wiki/Sound_symbolism), that have use n=4
+# for ngram analysis. (The --ngram-size argument is a tradeoff. Default is 3.
+# Bigger values make the resonance with the corpus stronger, but also make it
+# harder to be creative; it may be impossible to generate words if you go too
+# high. Smaller values give the algorithm more freedom in both size and
+# character sequence, but the output might sound less like the corpus.)
+slithyt generate --corpus path/to/corpus.txt --min-sentiment 0.8 --ngram-size 4
+# Generate words that are at between 4 and 8 characters long, and that are at
+# least moderately pronounceable. (Pronounceability depends partly on the
+# speaker's judgment; slithyt uses a simple algorithm to predict scores from
+# 0 (hardest) to 1 (easiest), but the corpus may affect how reasonable 0.5 is.
+# Typically, the variety of generated word lengths matches the variety of
+# word lengths in the corpus. These values constrain output but may make
+# generation impossible, if nothing in the corpus is as small or as large as
+# what was requested.)
+slithyt generate --corpus path/to/corpus.txt --min-length 4 --max-length 8 --min-pronounceability 0.5
+# Generate 5 words that rhyme with synergy
+slithyt generate --count 5 --rhymes-with synergy
+# Report the rhyming analysis for synergy. (Only known words are usable
+# as a rhyming template; passing made-up words here will do nothing
+# useful.)
+slithyt rhyme synergy
+# Check to see whether a particular made-up word would pass certain tests.
+slithyt validate synerjee
+```

slithyt-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+slithyt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+slithyt/build.py,sha256=kY33U9J-9VsqE7Yk7GmGU1suz4djF-WR5MCdpmb8rTg,2218
+slithyt/cli.py,sha256=Iti7pwMBmme5rjX1I-8L5sCtbOZER3yjYAEYWhQa9h4,7780
+slithyt/generator.py,sha256=a6rIySznbYE-CNU8SpRbEusPFQsKfWHE04J1kQRp6mU,3123
+slithyt/pronounce.py,sha256=WJrnBqQhj23eg-ab7X2O18Pzkt8nWF9wgGCxVe4qj9A,1995
+slithyt/rhyme.py,sha256=Ka-bbU_F6FZ9U_X6Blz_hCZE0f3bXRhFfFJ6HGwSHs8,3468
+slithyt/sentiment.py,sha256=JRbJ2Etp9cUNw5h4BoN7XxiYT7dGE6eDhaqhixdvjY4,4228
+slithyt/utils.py,sha256=_NrSALdA-tnnCmMXA3m60_Jpqf-lRRwku92LJ0zt414,595
+slithyt/validator.py,sha256=eglvR9nKCh9oIylzgBc4Y2P5OheJzLtwdi-RqEF0RXQ,1922
+slithyt/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+slithyt-1.0.0.dist-info/licenses/LICENSE,sha256=Hr7Rdl74t83L74WCKMfbp7vkpIdkrqaJ3uUjpnnse4w,1057
+slithyt-1.0.0.dist-info/METADATA,sha256=pdRjy34I6EB9OOuyxPzbEPkfAoePp1jGDnZgN2BrfqU,4381
+slithyt-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+slithyt-1.0.0.dist-info/entry_points.txt,sha256=EPXqPZYpJ7a1DV3x3AfJLi8uniQjoPMsHy5dQOoGNOE,45
+slithyt-1.0.0.dist-info/top_level.txt,sha256=0SZUC3JVinOynykEdmR_YnhhBIcBRciEA7kkcTWjRms,8
+slithyt-1.0.0.dist-info/RECORD,,

slithyt-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

slithyt-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ slithyt = slithyt.cli:main

slithyt-1.0.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,7 @@
+Copyright (c) 2025 Daniel Hardman
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

slithyt-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ slithyt