conlang 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
conlang/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ from .language import Language
2
+ from .language_config import LanguageConfig
3
+ from .swadesh import SWADESH
4
+ from .vocabulary import Vocabulary
5
+ from .utils import split_syllables, is_acceptable
6
+ from .sound_change import SoundChange, SoundChangePipeline
7
+
8
+ __all__ = ['Language', 'LanguageConfig', 'SWADESH', 'Vocabulary',
9
+ 'split_syllables', 'is_acceptable', 'SoundChange', 'SoundChangePipeline']
conlang/language.py ADDED
@@ -0,0 +1,87 @@
1
+ import numpy as np
2
+ import warnings
3
+
4
+ from typing import List, Optional
5
+ from .swadesh import SWADESH
6
+ from .vocabulary import Vocabulary
7
+ from .language_config import LanguageConfig
8
+ from .utils import split_syllables, is_acceptable
9
+
10
+
11
+ MAX_ATTEMPTS = 10
12
+
13
+
14
+ class Language:
15
+ """
16
+ Represents a language, including its configuration and vocabulary.
17
+
18
+ Attributes:
19
+ name (str): The name of the language.
20
+ config (LanguageConfig): The configuration for phonemes, patterns, and stress.
21
+ vocabulary (Vocabulary): The generated vocabulary for the language.
22
+ """
23
+ def __init__(self, name: str, config: LanguageConfig, vocabulary: Optional[Vocabulary] = None):
24
+ self.name = name
25
+ self.config = config
26
+ self.vocabulary = vocabulary or Vocabulary()
27
+
28
+ def generate_word(self, rank: int = -1) -> str:
29
+ """
30
+ Generates a word based on the language's configuration and word frequency rank.
31
+
32
+ Args:
33
+ rank (int): The rank of the word for frequency purposes. Defaults to -1.
34
+
35
+ Returns:
36
+ str: The generated word.
37
+ """
38
+ # Select a pattern based on rank (common words have simpler patterns)
39
+ patterns = self.config.patterns[:2] if 0 <= rank < 25 else self.config.patterns
40
+ pattern = np.random.choice(patterns)
41
+
42
+ word = ''.join(np.random.choice(self.config.phonemes[k]) for k in pattern)
43
+
44
+ syllables = split_syllables(word)
45
+
46
+ stressed_index = max(np.random.choice(self.config.stress), -len(syllables))
47
+ syllables[stressed_index] = "ˈ" + syllables[stressed_index]
48
+
49
+ return ''.join(syllables)
50
+
51
+ def generate_vocabulary(self, glosses: Optional[List[str]] = None):
52
+ """
53
+ Generates a vocabulary for the language based on glosses.
54
+
55
+ Args:
56
+ glosses (List[str], optional): A list of glosses to use for the vocabulary.
57
+ Defaults to the SWADESH list.
58
+ """
59
+ self.vocabulary = Vocabulary()
60
+
61
+ glosses = glosses or SWADESH
62
+
63
+ for gloss in glosses:
64
+ rank = SWADESH.index(gloss) if gloss in SWADESH else -1
65
+ attempts = 0
66
+
67
+ while attempts < MAX_ATTEMPTS:
68
+ word = self.generate_word(rank=rank)
69
+ if is_acceptable(word) and not self.vocabulary.has_word(word):
70
+ break
71
+ attempts += 1
72
+ self.vocabulary.add_item(word, gloss)
73
+
74
+ if attempts == MAX_ATTEMPTS:
75
+ warnings.warn(f"Failed to generate unique acceptable word for '{gloss}'. Please, check your configuration.")
76
+
77
+ def __str__(self) -> str:
78
+ """
79
+ Returns a string representation of the language.
80
+ """
81
+ return f"{self.name}\n\n{self.config}\n\n{self.vocabulary}"
82
+
83
+ def __repr__(self):
84
+ """
85
+ Returns a string representation of the language.
86
+ """
87
+ return self.__str__()
@@ -0,0 +1,135 @@
1
+ import json
2
+ import numpy as np
3
+
4
+ from pathlib import Path
5
+ from typing import Dict, List
6
+ from .presets import PRESETS
7
+
8
+
9
+ class LanguageConfig:
10
+ """
11
+ Represents the configuration of a language, including its phonemes, patterns, and stress rules.
12
+
13
+ Attributes:
14
+ phonemes (Dict[str, List[str]]): A dictionary mapping categories to phoneme lists.
15
+ patterns (List[str]): A list of word patterns.
16
+ stress (List[int]): A list of stress positions.
17
+ """
18
+
19
+ def __init__(self, phonemes: Dict[str, List[str]], patterns: List[str], stress: List[int]):
20
+ self.phonemes = phonemes
21
+ self.patterns = patterns
22
+ self.stress = stress
23
+
24
+ @staticmethod
25
+ def from_str(config_str: str) -> 'LanguageConfig':
26
+ """
27
+ Parses a configuration string to create a LanguageConfig instance.
28
+
29
+ Args:
30
+ config_str (str): The configuration as a multi-line string.
31
+
32
+ Returns:
33
+ LanguageConfig: The parsed language configuration.
34
+ """
35
+ phonemes = {}
36
+ patterns = []
37
+ stress = []
38
+
39
+ for line in config_str.splitlines():
40
+ line = line.strip()
41
+ if not line:
42
+ continue
43
+ if ':' in line:
44
+ key, values = line.split(':')
45
+ phonemes[key.strip()] = values.strip().split()
46
+ elif line.replace('-', '').replace(' ', '').isdigit():
47
+ stress.extend(map(int, line.split()))
48
+ elif line.isupper():
49
+ patterns.extend(line.split())
50
+ else:
51
+ raise ValueError(f'Invalid line in configuration: {line}')
52
+
53
+ return LanguageConfig(phonemes, patterns, stress)
54
+
55
+ @staticmethod
56
+ def from_txt(file_path: str) -> 'LanguageConfig':
57
+ """
58
+ Reads a configuration from a text file to create a LanguageConfig instance.
59
+
60
+ Args:
61
+ file_path (str): The path to the configuration file.
62
+
63
+ Returns:
64
+ LanguageConfig: The parsed language configuration.
65
+ """
66
+ path = Path(file_path)
67
+ if not path.is_file():
68
+ raise FileNotFoundError(f'File not found: {file_path}')
69
+ with path.open('r', encoding='utf-8') as f:
70
+ return LanguageConfig.from_str(f.read())
71
+
72
+ @staticmethod
73
+ def from_dict(config_dict: Dict) -> 'LanguageConfig':
74
+ """
75
+ Creates a LanguageConfig instance from a dictionary.
76
+
77
+ Args:
78
+ config_dict (Dict): A dictionary containing the configuration.
79
+
80
+ Returns:
81
+ LanguageConfig: The parsed language configuration.
82
+ """
83
+ return LanguageConfig(
84
+ phonemes=config_dict['phonemes'],
85
+ patterns=config_dict['patterns'],
86
+ stress=config_dict['stress']
87
+ )
88
+
89
+ @staticmethod
90
+ def from_json(file_path: str) -> 'LanguageConfig':
91
+ """
92
+ Reads a configuration from a JSON file to create a LanguageConfig instance.
93
+
94
+ Args:
95
+ file_path (str): The path to the configuration file.
96
+
97
+ Returns:
98
+ LanguageConfig: The parsed language configuration.
99
+ """
100
+ path = Path(file_path)
101
+ if not path.is_file():
102
+ raise FileNotFoundError(f'File not found: {file_path}')
103
+ with path.open('r', encoding='utf-8') as f:
104
+ return LanguageConfig.from_dict(json.load(f))
105
+
106
+ @staticmethod
107
+ def random() -> 'LanguageConfig':
108
+ """
109
+ Generates a random LanguageConfig instance using predefined presets.
110
+
111
+ Returns:
112
+ LanguageConfig: A randomly selected language configuration.
113
+ """
114
+ preset_key = np.random.choice(list(PRESETS))
115
+ preset = PRESETS[preset_key]
116
+ return LanguageConfig(
117
+ phonemes=preset['phonemes'],
118
+ patterns=preset['patterns'],
119
+ stress=preset['stress']
120
+ )
121
+
122
+ def __str__(self) -> str:
123
+ """
124
+ Returns a string representation of the configuration.
125
+ """
126
+ phonemes = '\n'.join(f'{k}: {" ".join(v)}' for k, v in self.phonemes.items())
127
+ patterns = ' '.join(self.patterns)
128
+ stress = ' '.join(map(str, self.stress))
129
+ return f'{phonemes}\n{patterns}\n{stress}'
130
+
131
+ def __repr__(self) -> str:
132
+ """
133
+ Returns a string representation of the configuration.
134
+ """
135
+ return self.__str__()
conlang/phonemes.py ADDED
@@ -0,0 +1,53 @@
1
+ # Base consonants: Stops, nasals, trills, flaps, fricatives, approximants, and laterals
2
+ BASE_CONSONANTS = [
3
+ 'p', 'b', 't', 'd', 'ʈ', 'ɖ', 'c', 'ɟ', 'k', 'g', 'q', 'ɢ', 'ʔ', # stops
4
+ 'm', 'ɱ', 'n', 'ɳ', 'ɲ', 'ŋ', 'ɴ', # nasals
5
+ 'ʙ', 'r', 'ʀ', # trills
6
+ 'ⱱ', 'ɾ', 'ɽ', # taps/flaps
7
+ 'ɸ', 'β', 'f', 'v', 'θ', 'ð', 's', 'z', 'ʃ', 'ʒ', 'ʂ', 'ʐ', # fricatives
8
+ 'ç', 'ʝ', 'x', 'ɣ', 'χ', 'ʁ', 'ħ', 'ʕ', 'h', 'ɦ',
9
+ 'ɬ', 'ɮ', # lateral fricatives
10
+ 'ʋ', 'ɹ', 'ɻ', 'j', 'ɰ', # approximants
11
+ 'l', 'ɭ', 'ʎ', 'ʟ', # laterals
12
+ 'w' # semivowels
13
+ ]
14
+
15
+ # Affricates: Stops combined with fricatives
16
+ AFFRICATES = ['ts', 'dz', 'tʃ', 'dʒ', 'ʈʂ', 'ɖʐ', 'tɕ', 'dʑ', 'tɬ', 'dɮ']
17
+
18
+ # Modifiers
19
+ ASPIRATED = [f'{c}ʰ' for c in BASE_CONSONANTS + AFFRICATES]
20
+ EJECTIVES = [f'{c}ʼ' for c in BASE_CONSONANTS + AFFRICATES]
21
+ LABIALIZED = [f'{c}ʷ' for c in BASE_CONSONANTS + AFFRICATES]
22
+
23
+ # Combined consonants
24
+ CONSONANTS = BASE_CONSONANTS + AFFRICATES + ASPIRATED + EJECTIVES
25
+
26
+ # Base vowels: High, mid, and low, including rounded and unrounded variants
27
+ BASE_VOWELS = [
28
+ 'i', 'y', 'ɨ', 'ʉ', 'ɯ', 'u', # high
29
+ 'ɪ', 'ʏ', 'ʊ', # near-high
30
+ 'e', 'ø', 'ɘ', 'ɵ', 'ɤ', 'o', # mid
31
+ 'ə', # mid-central
32
+ 'ɛ', 'œ', 'ɜ', 'ɞ', 'ʌ', 'ɔ', # open-mid
33
+ 'æ', 'ɐ', # near-open
34
+ 'a', 'ɶ', 'ä', 'ɑ', 'ɒ' # open
35
+ ]
36
+
37
+ # Long vowels
38
+ LONG_VOWELS = [f'{v}ː' for v in BASE_VOWELS]
39
+
40
+ # Combined vowels
41
+ VOWELS = BASE_VOWELS + LONG_VOWELS
42
+
43
+ # All phonemes
44
+ PHONEMES = CONSONANTS + VOWELS + ["ˈ"]
45
+
46
+ # Common phonemes: A subset of frequently used phonemes
47
+ COMMON_PHONEMES = [
48
+ 'p', 't', 'k', 'm', 'n',
49
+ 'b', 'd', 'g',
50
+ 's', 'z',
51
+ 'l', 'r',
52
+ 'i', 'u', 'e', 'o', 'a'
53
+ ]
conlang/presets.py ADDED
@@ -0,0 +1,207 @@
1
+ PRESETS = {
2
+ 'polynesian': {
3
+ 'phonemes': {
4
+ 'C': ['m', 'n', 'ŋ',
5
+ 'p', 't', 'k',
6
+ 'h',
7
+ 'r'],
8
+ 'V': ['a', 'e', 'i', 'o', 'u']
9
+ },
10
+ 'patterns': ['CVV', 'CVCV', 'VCV', 'VCVV'],
11
+ 'stress': [-2]
12
+ },
13
+ 'semitic': {
14
+ 'phonemes': {
15
+ 'C': ['m', 'n',
16
+ 't', 'k', 'q', 'ʔ',
17
+ 'b', 'd', 'g',
18
+ 'f', 's', 'ʃ', 'χ', 'h', 'ħ',
19
+ 'z', 'ʕ',
20
+ 'r', 'l',
21
+ 'j', 'w'],
22
+ 'V': ['a', 'i', 'u'],
23
+ 'L': ['aː', 'iː', 'uː']
24
+ },
25
+ 'patterns': ['CVC', 'CLC', 'CVCV', 'CLCV', 'CVCVC', 'CLCVC'],
26
+ 'stress': [-2]
27
+ },
28
+ 'sinitic': {
29
+ 'phonemes': {
30
+ 'C': ['m', 'n', 'ɲ', 'ŋ',
31
+ 'p', 't', 'ts', 'tʃ', 'k', 'ʔ',
32
+ 'pʰ', 'tʰ', 'tsʰ', 'tʃʰ', 'kʰ',
33
+ 'b', 'd', 'dz', 'dʒ', 'g',
34
+ 's', 'ʃ', 'x',
35
+ 'z', 'ʒ', 'ɣ',
36
+ 'l'],
37
+ 'V': ['a', 'e', 'i', 'o', 'u'],
38
+ 'G': ['j', 'w'],
39
+ 'F': ['m', 'n', 'ŋ',
40
+ 'p', 't', 'k',
41
+ 'j', 'w']
42
+ },
43
+ 'patterns': ['CV', 'CGV', 'CVF', 'CGVF'],
44
+ 'stress': [-1]
45
+ },
46
+ 'amazonian':
47
+ {
48
+ 'phonemes': {
49
+ 'C': ['m', 'n', 'ɲ',
50
+ 'p', 't', 'k', 'ʔ',
51
+ 'ʃ', 'h',
52
+ 'r',
53
+ 'j', 'w'],
54
+ 'V': ['a', 'e', 'i', 'o', 'u',
55
+ 'ɛ', 'ɔ', 'ɯ']
56
+ },
57
+ 'patterns': ['CV', 'VCV', 'CVCV'],
58
+ 'stress': [-1]
59
+ },
60
+ 'andean': {
61
+ 'phonemes': {
62
+ 'C': ['m', 'n', 'ɲ',
63
+ 'p', 't', 'tʃ', 'k', 'q',
64
+ 's', 'h',
65
+ 'r', 'l', 'ʎ',
66
+ 'j', 'w'],
67
+ 'V': ['a', 'i', 'u'],
68
+ 'Q': ['rm', 'rp', 'rk', 'rq',
69
+ 'sp', 'sk', 'sq', 'sm',
70
+ 'kp', 'kt', 'ks',
71
+ 'qp', 'qt', 'qs'],
72
+ 'F': ['n', 'k', 's', 'r']
73
+ },
74
+ 'patterns': ['VCV', 'CVCV', 'VQV', 'CVQV', 'VCVF', 'CVCVF', 'VQVF', 'CVQVF'],
75
+ 'stress': [-2]
76
+ },
77
+ 'nilotic': {
78
+ 'phonemes': {
79
+ 'C': ['m', 'n', 'ŋ', 'ɲ',
80
+ 'p', 't', 'c', 'k',
81
+ 'b', 'd', 'ɟ', 'g',
82
+ 's',
83
+ 'r', 'l',
84
+ 'j', 'w'],
85
+ 'G': ['j', 'w'],
86
+ 'V': ['a', 'e', 'i', 'o', 'u',
87
+ 'ɛ', 'ɔ', 'ʌ']
88
+ },
89
+ 'patterns': ['CVC', 'CGVC'],
90
+ 'stress': [-1]
91
+ },
92
+ 'pacific_coast': {
93
+ 'phonemes': {
94
+ 'C': ['m', 'n',
95
+ 't', 'ts', 'tɬ', 'k', 'kʷ', 'q', 'qʷ', 'ʔ',
96
+ 'tʼ', 'tsʼ', 'tɬʼ', 'kʼ', 'kʷʼ', 'qʼ', 'qʷʼ',
97
+ 's', 'ɬ', 'x', 'xʷ', 'χ', 'χʷ', 'h',
98
+ 'l',
99
+ 'j', 'w'],
100
+ 'V': ['a', 'e', 'i', 'u',
101
+ 'ə',
102
+ 'aː', 'eː', 'iː', 'uː']
103
+ },
104
+ 'patterns': ['CVC'],
105
+ 'stress': [-1]
106
+ },
107
+ 'uralic': {
108
+ 'phonemes': {
109
+ 'C': ['m', 'n', 'ɲ', 'ŋ', 'p', 't', 'tɕ', 'tʃ', 'k', 's', 'ɕ', 'ʃ',
110
+ 'r', 'l', 'ʎ', 'j', 'w'],
111
+ 'Q': ['pt', 'ps', 'tk', 'tɕk', 'tʃk', 'kt', 'ktɕ', 'ktʃ', 'ks',
112
+ 'mp', 'mt', 'mk', 'nt', 'ŋk', 'lk', 'lm', 'lw', 'rk', 'rm',
113
+ 'rw'],
114
+ 'V': ['a', 'e', 'i', 'o', 'u', 'y', 'ɛ'],
115
+ 'F': ['a', 'i']
116
+ },
117
+ 'patterns': ['VCF', 'CVCF'],
118
+ 'stress': [-2]
119
+ },
120
+ 'germanic': {
121
+ 'phonemes': {
122
+ 'C': ['m', 'n',
123
+ 'p', 't', 'k',
124
+ 'b', 'd',
125
+ 'f', 'θ', 's', 'h',
126
+ 'z',
127
+ 'r', 'l',
128
+ 'j', 'w'],
129
+ 'Q': ['pl', 'kl', 'bl', 'fl', 'sl',
130
+ 'pr', 'tr', 'kr', 'br', 'dr', 'fr', 'θr',
131
+ 'tw', 'kw', 'dw', 'θw', 'sw', 'hw',
132
+ 'kn', 'sm', 'sn', 'sp', 'st', 'sk'],
133
+ 'F': ['mp', 'nt', 'nk', 'ns',
134
+ 'zd',
135
+ 'rt', 'rk', 'rs'],
136
+ 'V': ['a', 'e', 'i', 'u'],
137
+ 'L': ['aː', 'eː', 'iː', 'uː', 'ɔː'],
138
+ 'D': ['aw', 'aj','ew', 'iw']
139
+ },
140
+ 'patterns': ['CVC', 'QVC', 'CVF',
141
+ 'CLC', 'QLC',
142
+ 'CDC', 'QDC',
143
+ 'VC', 'VF', 'DC',
144
+ 'LC'],
145
+ 'stress': [-2]
146
+ },
147
+ 'caucasus': {
148
+ 'phonemes': {
149
+ 'C': ['m', 'n',
150
+ 'pʼ', 'tʼ', 'tsʼ', 'tʃʼ', 'kʼ', 'qʼ',
151
+ 'b', 'd', 'dz', 'dʒ', 'g', 'gʷ',
152
+ 's', 'ʃ', 'χ', 'χʷ', 'ħ', 'ħʷ',
153
+ 'z', 'ʒ', 'ʁ', 'ʁʷ',
154
+ 'r', 'l'],
155
+ 'V': ['a', 'ə'],
156
+ },
157
+ 'patterns': ['CV', 'VC', 'CVC', 'VCV', 'CVCV'],
158
+ 'stress': [-2]
159
+ },
160
+ 'bantu': {
161
+ 'phonemes': {
162
+ 'C': ['m', 'n', 'ɲ',
163
+ 'p', 't', 'tʃ', 'k',
164
+ 'b', 'd', 'dʒ', 'g'],
165
+ 'Q': ['mp', 'mb', 'nt', 'nd', 'ŋk', 'ŋg', 'ntʃ', 'ndʒ'],
166
+ 'V': ['a', 'e', 'i', 'o', 'u']
167
+ },
168
+ 'patterns': ['CV', 'QV', 'VCV', 'VQV',
169
+ 'CVCV', 'CVQV', 'QVCV'],
170
+ 'stress': [-1, -2]
171
+ },
172
+ 'maya': {
173
+ 'phonemes': {
174
+ 'C': ['m', 'n',
175
+ 'p', 't', 'ts', 'tʃ', 'k', 'ʔ',
176
+ 'b',
177
+ 'pʼ', 'tʼ', 'tsʼ', 'tʃʼ', 'kʼ',
178
+ 's', 'ʃ', 'χ', 'h',
179
+ 'l',
180
+ 'j', 'w'],
181
+ 'F': ['m', 'n',
182
+ 'ts', 'tʃ', 'k', 'ʔ',
183
+ 'b',
184
+ 'tsʼ', 'tʃʼ', 'kʼ',
185
+ 'ʃ', 'h',
186
+ 'l'],
187
+ 'V': ['a', 'e', 'i', 'o', 'u',
188
+ 'aː', 'eː', 'iː', 'oː', 'uː']
189
+ },
190
+ 'patterns': ['CVF'],
191
+ 'stress': [-1]
192
+ },
193
+ 'caddoan': {
194
+ 'phonemes': {
195
+ 'C': ['n',
196
+ 'p', 't', 'tʃ', 'k', 'ʔ',
197
+ 's', 'ʃ', 'x', 'h',
198
+ 'r',
199
+ 'w'],
200
+ 'F': ['t', 'k', 'ʔ'],
201
+ 'V': ['a', 'e', 'i', 'o', 'u',
202
+ 'aː', 'eː', 'iː', 'oː', 'uː']
203
+ },
204
+ 'patterns': ['CVCV', 'CVCVF'],
205
+ 'stress': [-1, -2]
206
+ }
207
+ }