PyPI - conlang - Versions diffs - 0.1__py3-none-any.whl - Mend

conlang 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

conlang/__init__.py +9 -0
conlang/language.py +87 -0
conlang/language_config.py +135 -0
conlang/phonemes.py +53 -0
conlang/presets.py +207 -0
conlang/rules.py +192 -0
conlang/sound_change.py +261 -0
conlang/swadesh.py +209 -0
conlang/utils.py +131 -0
conlang/vocabulary.py +200 -0
conlang-0.1.dist-info/LICENSE +674 -0
conlang-0.1.dist-info/METADATA +53 -0
conlang-0.1.dist-info/RECORD +15 -0
conlang-0.1.dist-info/WHEEL +5 -0
conlang-0.1.dist-info/top_level.txt +1 -0

conlang/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .language import Language
+from .language_config import LanguageConfig
+from .swadesh import SWADESH
+from .vocabulary import Vocabulary
+from .utils import split_syllables, is_acceptable
+from .sound_change import SoundChange, SoundChangePipeline
+__all__ = ['Language', 'LanguageConfig', 'SWADESH', 'Vocabulary',
+           'split_syllables', 'is_acceptable', 'SoundChange', 'SoundChangePipeline']

conlang/language.py ADDED Viewed

@@ -0,0 +1,87 @@
+import numpy as np
+import warnings
+from typing import List, Optional
+from .swadesh import SWADESH
+from .vocabulary import Vocabulary
+from .language_config import LanguageConfig
+from .utils import split_syllables, is_acceptable
+MAX_ATTEMPTS = 10
+class Language:
+    """
+    Represents a language, including its configuration and vocabulary.
+    Attributes:
+        name (str): The name of the language.
+        config (LanguageConfig): The configuration for phonemes, patterns, and stress.
+        vocabulary (Vocabulary): The generated vocabulary for the language.
+    """
+    def __init__(self, name: str, config: LanguageConfig, vocabulary: Optional[Vocabulary] = None):
+        self.name = name
+        self.config = config
+        self.vocabulary = vocabulary or Vocabulary()
+    def generate_word(self, rank: int = -1) -> str:
+        """
+        Generates a word based on the language's configuration and word frequency rank.
+        Args:
+            rank (int): The rank of the word for frequency purposes. Defaults to -1.
+        Returns:
+            str: The generated word.
+        """
+        # Select a pattern based on rank (common words have simpler patterns)
+        patterns = self.config.patterns[:2] if 0 <= rank < 25 else self.config.patterns
+        pattern = np.random.choice(patterns)
+        word = ''.join(np.random.choice(self.config.phonemes[k]) for k in pattern)
+        syllables = split_syllables(word)
+        stressed_index = max(np.random.choice(self.config.stress), -len(syllables))
+        syllables[stressed_index] = "ˈ" + syllables[stressed_index]
+        return ''.join(syllables)
+    def generate_vocabulary(self, glosses: Optional[List[str]] = None):
+        """
+        Generates a vocabulary for the language based on glosses.
+        Args:
+            glosses (List[str], optional): A list of glosses to use for the vocabulary.
+                                           Defaults to the SWADESH list.
+        """
+        self.vocabulary = Vocabulary()
+        glosses = glosses or SWADESH
+        for gloss in glosses:
+            rank = SWADESH.index(gloss) if gloss in SWADESH else -1
+            attempts = 0
+            while attempts < MAX_ATTEMPTS:
+                word = self.generate_word(rank=rank)
+                if is_acceptable(word) and not self.vocabulary.has_word(word):
+                    break
+                attempts += 1
+            self.vocabulary.add_item(word, gloss)
+            if attempts == MAX_ATTEMPTS:
+                warnings.warn(f"Failed to generate unique acceptable word for '{gloss}'. Please, check your configuration.")
+    def __str__(self) -> str:
+        """
+        Returns a string representation of the language.
+        """
+        return f"{self.name}\n\n{self.config}\n\n{self.vocabulary}"
+    def __repr__(self):
+        """
+        Returns a string representation of the language.
+        """
+        return self.__str__()

conlang/language_config.py ADDED Viewed

@@ -0,0 +1,135 @@
+import json
+import numpy as np
+from pathlib import Path
+from typing import Dict, List
+from .presets import PRESETS
+class LanguageConfig:
+    """
+    Represents the configuration of a language, including its phonemes, patterns, and stress rules.
+    Attributes:
+        phonemes (Dict[str, List[str]]): A dictionary mapping categories to phoneme lists.
+        patterns (List[str]): A list of word patterns.
+        stress (List[int]): A list of stress positions.
+    """
+    def __init__(self, phonemes: Dict[str, List[str]], patterns: List[str], stress: List[int]):
+        self.phonemes = phonemes
+        self.patterns = patterns
+        self.stress = stress
+    @staticmethod
+    def from_str(config_str: str) -> 'LanguageConfig':
+        """
+        Parses a configuration string to create a LanguageConfig instance.
+        Args:
+            config_str (str): The configuration as a multi-line string.
+        Returns:
+            LanguageConfig: The parsed language configuration.
+        """
+        phonemes = {}
+        patterns = []
+        stress = []
+        for line in config_str.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            if ':' in line:
+                key, values = line.split(':')
+                phonemes[key.strip()] = values.strip().split()
+            elif line.replace('-', '').replace(' ', '').isdigit():
+                stress.extend(map(int, line.split()))
+            elif line.isupper():
+                patterns.extend(line.split())
+            else:
+                raise ValueError(f'Invalid line in configuration: {line}')
+        return LanguageConfig(phonemes, patterns, stress)
+    @staticmethod
+    def from_txt(file_path: str) -> 'LanguageConfig':
+        """
+        Reads a configuration from a text file to create a LanguageConfig instance.
+        Args:
+            file_path (str): The path to the configuration file.
+        Returns:
+            LanguageConfig: The parsed language configuration.
+        """
+        path = Path(file_path)
+        if not path.is_file():
+            raise FileNotFoundError(f'File not found: {file_path}')
+        with path.open('r', encoding='utf-8') as f:
+            return LanguageConfig.from_str(f.read())
+    @staticmethod
+    def from_dict(config_dict: Dict) -> 'LanguageConfig':
+        """
+        Creates a LanguageConfig instance from a dictionary.
+        Args:
+            config_dict (Dict): A dictionary containing the configuration.
+        Returns:
+            LanguageConfig: The parsed language configuration.
+        """
+        return LanguageConfig(
+            phonemes=config_dict['phonemes'],
+            patterns=config_dict['patterns'],
+            stress=config_dict['stress']
+        )
+    @staticmethod
+    def from_json(file_path: str) -> 'LanguageConfig':
+        """
+        Reads a configuration from a JSON file to create a LanguageConfig instance.
+        Args:
+            file_path (str): The path to the configuration file.
+        Returns:
+            LanguageConfig: The parsed language configuration.
+        """
+        path = Path(file_path)
+        if not path.is_file():
+            raise FileNotFoundError(f'File not found: {file_path}')
+        with path.open('r', encoding='utf-8') as f:
+            return LanguageConfig.from_dict(json.load(f))
+    @staticmethod
+    def random() -> 'LanguageConfig':
+        """
+        Generates a random LanguageConfig instance using predefined presets.
+        Returns:
+            LanguageConfig: A randomly selected language configuration.
+        """
+        preset_key = np.random.choice(list(PRESETS))
+        preset = PRESETS[preset_key]
+        return LanguageConfig(
+            phonemes=preset['phonemes'],
+            patterns=preset['patterns'],
+            stress=preset['stress']
+        )
+    def __str__(self) -> str:
+        """
+        Returns a string representation of the configuration.
+        """
+        phonemes = '\n'.join(f'{k}: {" ".join(v)}' for k, v in self.phonemes.items())
+        patterns = ' '.join(self.patterns)
+        stress = ' '.join(map(str, self.stress))
+        return f'{phonemes}\n{patterns}\n{stress}'
+    def __repr__(self) -> str:
+        """
+        Returns a string representation of the configuration.
+        """
+        return self.__str__()

conlang/phonemes.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Base consonants: Stops, nasals, trills, flaps, fricatives, approximants, and laterals
+BASE_CONSONANTS = [
+    'p', 'b', 't', 'd', 'ʈ', 'ɖ', 'c', 'ɟ', 'k', 'g', 'q', 'ɢ', 'ʔ',    # stops
+    'm', 'ɱ', 'n', 'ɳ', 'ɲ', 'ŋ', 'ɴ',                                  # nasals
+    'ʙ', 'r', 'ʀ',                                                      # trills
+    'ⱱ', 'ɾ', 'ɽ',                                                      # taps/flaps
+    'ɸ', 'β', 'f', 'v', 'θ', 'ð', 's', 'z', 'ʃ', 'ʒ', 'ʂ', 'ʐ',         # fricatives
+    'ç', 'ʝ', 'x', 'ɣ', 'χ', 'ʁ', 'ħ', 'ʕ', 'h', 'ɦ',
+    'ɬ', 'ɮ',                                                           # lateral fricatives
+    'ʋ', 'ɹ', 'ɻ', 'j', 'ɰ',                                            # approximants
+    'l', 'ɭ', 'ʎ', 'ʟ',                                                 # laterals
+    'w'                                                                 # semivowels
+]
+# Affricates: Stops combined with fricatives
+AFFRICATES = ['ts', 'dz', 'tʃ', 'dʒ', 'ʈʂ', 'ɖʐ', 'tɕ', 'dʑ', 'tɬ', 'dɮ']
+# Modifiers
+ASPIRATED = [f'{c}ʰ' for c in BASE_CONSONANTS + AFFRICATES]
+EJECTIVES = [f'{c}ʼ' for c in BASE_CONSONANTS + AFFRICATES]
+LABIALIZED = [f'{c}ʷ' for c in BASE_CONSONANTS + AFFRICATES]
+# Combined consonants
+CONSONANTS = BASE_CONSONANTS + AFFRICATES + ASPIRATED + EJECTIVES
+# Base vowels: High, mid, and low, including rounded and unrounded variants
+BASE_VOWELS = [
+    'i', 'y', 'ɨ', 'ʉ', 'ɯ', 'u',   # high
+    'ɪ', 'ʏ', 'ʊ',                  # near-high
+    'e', 'ø', 'ɘ', 'ɵ', 'ɤ', 'o',   # mid
+    'ə',                            # mid-central
+    'ɛ', 'œ', 'ɜ', 'ɞ', 'ʌ', 'ɔ',   # open-mid
+    'æ', 'ɐ',                       # near-open
+    'a', 'ɶ', 'ä', 'ɑ', 'ɒ'         # open
+]
+# Long vowels
+LONG_VOWELS = [f'{v}ː' for v in BASE_VOWELS]
+# Combined vowels
+VOWELS = BASE_VOWELS + LONG_VOWELS
+# All phonemes
+PHONEMES = CONSONANTS + VOWELS + ["ˈ"]
+# Common phonemes: A subset of frequently used phonemes
+COMMON_PHONEMES = [
+    'p', 't', 'k', 'm', 'n',
+    'b', 'd', 'g',
+    's', 'z',
+    'l', 'r',
+    'i', 'u', 'e', 'o', 'a'
+]

conlang/presets.py ADDED Viewed

@@ -0,0 +1,207 @@
+PRESETS = {
+    'polynesian': {
+        'phonemes': {
+            'C': ['m', 'n', 'ŋ',
+                  'p', 't', 'k',
+                  'h',
+                  'r'],
+            'V': ['a', 'e', 'i', 'o', 'u']
+        },
+        'patterns': ['CVV', 'CVCV', 'VCV', 'VCVV'],
+        'stress': [-2]
+    },
+    'semitic': {
+        'phonemes': {
+            'C': ['m', 'n',
+                  't', 'k', 'q', 'ʔ',
+                  'b', 'd', 'g',
+                  'f', 's', 'ʃ', 'χ', 'h', 'ħ',
+                  'z', 'ʕ',
+                  'r', 'l',
+                  'j', 'w'],
+            'V': ['a', 'i', 'u'],
+            'L': ['aː', 'iː', 'uː']
+        },
+        'patterns': ['CVC', 'CLC', 'CVCV', 'CLCV', 'CVCVC', 'CLCVC'],
+        'stress': [-2]
+    },
+    'sinitic': {
+        'phonemes': {
+            'C': ['m', 'n', 'ɲ', 'ŋ',
+                  'p', 't', 'ts', 'tʃ', 'k', 'ʔ',
+                  'pʰ', 'tʰ', 'tsʰ', 'tʃʰ', 'kʰ',
+                  'b', 'd', 'dz', 'dʒ', 'g',
+                  's', 'ʃ', 'x',
+                  'z', 'ʒ', 'ɣ',
+                  'l'],
+            'V': ['a', 'e', 'i', 'o', 'u'],
+            'G': ['j', 'w'],
+            'F': ['m', 'n', 'ŋ',
+                  'p', 't', 'k',
+                  'j', 'w']
+        },
+        'patterns': ['CV', 'CGV', 'CVF', 'CGVF'],
+        'stress': [-1]
+    },
+    'amazonian':
+    {
+        'phonemes': {
+            'C': ['m', 'n', 'ɲ',
+                  'p', 't', 'k', 'ʔ',
+                  'ʃ', 'h',
+                  'r',
+                  'j', 'w'],
+            'V': ['a', 'e', 'i', 'o', 'u',
+                  'ɛ', 'ɔ', 'ɯ']
+        },
+        'patterns': ['CV', 'VCV', 'CVCV'],
+        'stress': [-1]
+    },
+    'andean': {
+        'phonemes': {
+            'C': ['m', 'n', 'ɲ',
+                  'p', 't', 'tʃ', 'k', 'q',
+                  's', 'h',
+                  'r', 'l', 'ʎ',
+                  'j', 'w'],
+            'V': ['a', 'i', 'u'],
+            'Q': ['rm', 'rp', 'rk', 'rq',
+                  'sp', 'sk', 'sq', 'sm',
+                  'kp', 'kt', 'ks',
+                  'qp', 'qt', 'qs'],
+            'F': ['n', 'k', 's', 'r']
+        },
+        'patterns': ['VCV', 'CVCV', 'VQV', 'CVQV', 'VCVF', 'CVCVF', 'VQVF', 'CVQVF'],
+        'stress': [-2]
+    },
+    'nilotic': {
+        'phonemes': {
+            'C': ['m', 'n', 'ŋ', 'ɲ',
+                  'p', 't', 'c', 'k',
+                  'b', 'd', 'ɟ', 'g',
+                  's',
+                  'r', 'l',
+                  'j', 'w'],
+            'G': ['j', 'w'],
+            'V': ['a', 'e', 'i', 'o', 'u',
+                  'ɛ', 'ɔ', 'ʌ']
+        },
+        'patterns': ['CVC', 'CGVC'],
+        'stress': [-1]
+    },
+    'pacific_coast': {
+        'phonemes': {
+            'C': ['m', 'n',
+                  't', 'ts', 'tɬ', 'k', 'kʷ', 'q', 'qʷ', 'ʔ',
+                  'tʼ', 'tsʼ', 'tɬʼ', 'kʼ', 'kʷʼ', 'qʼ', 'qʷʼ',
+                  's', 'ɬ', 'x', 'xʷ', 'χ', 'χʷ', 'h',
+                  'l',
+                  'j', 'w'],
+            'V': ['a', 'e', 'i', 'u',
+                  'ə',
+                  'aː', 'eː', 'iː', 'uː']
+        },
+        'patterns': ['CVC'],
+        'stress': [-1]
+    },
+    'uralic': {
+        'phonemes': {
+            'C': ['m', 'n', 'ɲ', 'ŋ', 'p', 't', 'tɕ', 'tʃ', 'k', 's', 'ɕ', 'ʃ',
+                  'r', 'l', 'ʎ', 'j', 'w'],
+            'Q': ['pt', 'ps', 'tk', 'tɕk', 'tʃk', 'kt', 'ktɕ', 'ktʃ', 'ks',
+                  'mp', 'mt', 'mk', 'nt', 'ŋk', 'lk', 'lm', 'lw', 'rk', 'rm',
+                  'rw'],
+            'V': ['a', 'e', 'i', 'o', 'u', 'y', 'ɛ'],
+            'F': ['a', 'i']
+        },
+        'patterns': ['VCF', 'CVCF'],
+        'stress': [-2]
+    },
+    'germanic': {
+        'phonemes': {
+            'C': ['m', 'n',
+                  'p', 't', 'k',
+                  'b', 'd',
+                  'f', 'θ', 's', 'h',
+                  'z',
+                  'r', 'l',
+                  'j', 'w'],
+            'Q': ['pl', 'kl', 'bl', 'fl', 'sl',
+                  'pr', 'tr', 'kr', 'br', 'dr', 'fr', 'θr',
+                  'tw', 'kw', 'dw', 'θw', 'sw', 'hw',
+                  'kn', 'sm', 'sn', 'sp', 'st', 'sk'],
+            'F': ['mp', 'nt', 'nk', 'ns',
+                  'zd',
+                  'rt', 'rk', 'rs'],
+            'V': ['a', 'e', 'i', 'u'],
+            'L': ['aː', 'eː', 'iː', 'uː', 'ɔː'],
+            'D': ['aw', 'aj','ew', 'iw']
+        },
+        'patterns': ['CVC', 'QVC', 'CVF',
+                     'CLC', 'QLC',
+                     'CDC', 'QDC',
+                     'VC', 'VF', 'DC',
+                     'LC'],
+        'stress': [-2]
+    },
+    'caucasus': {
+        'phonemes': {
+            'C': ['m', 'n',
+                  'pʼ', 'tʼ', 'tsʼ', 'tʃʼ', 'kʼ', 'qʼ',
+                  'b', 'd', 'dz', 'dʒ', 'g', 'gʷ',
+                  's', 'ʃ', 'χ', 'χʷ', 'ħ', 'ħʷ',
+                  'z', 'ʒ', 'ʁ', 'ʁʷ',
+                  'r', 'l'],
+            'V': ['a', 'ə'],
+        },
+        'patterns': ['CV', 'VC', 'CVC', 'VCV', 'CVCV'],
+        'stress': [-2]
+    },
+    'bantu': {
+        'phonemes': {
+            'C': ['m', 'n', 'ɲ',
+                  'p', 't', 'tʃ', 'k',
+                  'b', 'd', 'dʒ', 'g'],
+            'Q': ['mp', 'mb', 'nt', 'nd', 'ŋk', 'ŋg', 'ntʃ', 'ndʒ'],
+            'V': ['a', 'e', 'i', 'o', 'u']
+        },
+        'patterns': ['CV', 'QV', 'VCV', 'VQV',
+                     'CVCV', 'CVQV', 'QVCV'],
+        'stress': [-1, -2]
+    },
+    'maya': {
+        'phonemes': {
+            'C': ['m', 'n',
+                  'p', 't', 'ts', 'tʃ', 'k', 'ʔ',
+                  'b',
+                  'pʼ', 'tʼ', 'tsʼ', 'tʃʼ', 'kʼ',
+                  's', 'ʃ', 'χ', 'h',
+                  'l',
+                  'j', 'w'],
+            'F': ['m', 'n',
+                  'ts', 'tʃ', 'k', 'ʔ',
+                  'b',
+                  'tsʼ', 'tʃʼ', 'kʼ',
+                  'ʃ', 'h',
+                  'l'],
+            'V': ['a', 'e', 'i', 'o', 'u',
+                  'aː', 'eː', 'iː', 'oː', 'uː']
+        },
+        'patterns': ['CVF'],
+        'stress': [-1]
+    },
+    'caddoan': {
+        'phonemes': {
+            'C': ['n',
+                  'p', 't', 'tʃ', 'k', 'ʔ',
+                  's', 'ʃ', 'x', 'h',
+                  'r',
+                  'w'],
+            'F': ['t', 'k', 'ʔ'],
+            'V': ['a', 'e', 'i', 'o', 'u',
+                  'aː', 'eː', 'iː', 'oː', 'uː']
+        },
+        'patterns': ['CVCV', 'CVCVF'],
+        'stress': [-1, -2]
+    }
+}