preling 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. preling-1.0.0/LICENSE +21 -0
  2. preling-1.0.0/PKG-INFO +75 -0
  3. preling-1.0.0/README.md +52 -0
  4. preling-1.0.0/pyproject.toml +6 -0
  5. preling-1.0.0/setup.cfg +41 -0
  6. preling-1.0.0/src/preling/__init__.py +0 -0
  7. preling-1.0.0/src/preling/app/__init__.py +6 -0
  8. preling-1.0.0/src/preling/app/app.py +7 -0
  9. preling-1.0.0/src/preling/app/commands/__init__.py +13 -0
  10. preling-1.0.0/src/preling/app/commands/delete.py +40 -0
  11. preling-1.0.0/src/preling/app/commands/init.py +123 -0
  12. preling-1.0.0/src/preling/app/commands/path.py +26 -0
  13. preling-1.0.0/src/preling/app/commands/stats.py +123 -0
  14. preling-1.0.0/src/preling/app/commands/study/__init__.py +201 -0
  15. preling-1.0.0/src/preling/app/commands/study/chooser.py +79 -0
  16. preling-1.0.0/src/preling/app/commands/study/evaluator.py +137 -0
  17. preling-1.0.0/src/preling/app/commands/study/interaction.py +71 -0
  18. preling-1.0.0/src/preling/app/commands/study/tts.py +41 -0
  19. preling-1.0.0/src/preling/app/commands/study/updater.py +30 -0
  20. preling-1.0.0/src/preling/db/__init__.py +54 -0
  21. preling-1.0.0/src/preling/db/base.py +9 -0
  22. preling-1.0.0/src/preling/db/models.py +82 -0
  23. preling-1.0.0/src/preling/preling.py +4 -0
  24. preling-1.0.0/src/preling/utils/__init__.py +0 -0
  25. preling-1.0.0/src/preling/utils/console.py +7 -0
  26. preling-1.0.0/src/preling/utils/paths.py +31 -0
  27. preling-1.0.0/src/preling/utils/time.py +9 -0
  28. preling-1.0.0/src/preling/utils/typer.py +13 -0
  29. preling-1.0.0/src/preling.egg-info/PKG-INFO +75 -0
  30. preling-1.0.0/src/preling.egg-info/SOURCES.txt +33 -0
  31. preling-1.0.0/src/preling.egg-info/dependency_links.txt +1 -0
  32. preling-1.0.0/src/preling.egg-info/entry_points.txt +2 -0
  33. preling-1.0.0/src/preling.egg-info/requires.txt +7 -0
  34. preling-1.0.0/src/preling.egg-info/top_level.txt +1 -0
preling-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Danylo Mysak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
preling-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.4
2
+ Name: preling
3
+ Version: 1.0.0
4
+ Summary: Command-line tool for learning foreign languages through gradual exposure to new vocabulary
5
+ Home-page: https://github.com/danmysak/preling
6
+ Author: Danylo Mysak
7
+ Author-email: danmysak@gmail.com
8
+ Project-URL: Bug Tracker, https://github.com/danmysak/preling/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.12
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: openai[voice_helpers]
16
+ Requires-Dist: prompt_toolkit
17
+ Requires-Dist: pyaudio
18
+ Requires-Dist: spacy
19
+ Requires-Dist: SQLAlchemy
20
+ Requires-Dist: tqdm
21
+ Requires-Dist: typer
22
+ Dynamic: license-file
23
+
24
+ # PreLing
25
+
26
+ PreLing is a command-line tool for improving language-comprehension skills through gradual exposure to new vocabulary. It supports every language that [SpaCy supports](https://spacy.io/usage/models#languages). Because PreLing uses GPT under the hood, you must have a paid [OpenAI account](https://platform.openai.com/) and an [API key](https://platform.openai.com/api-keys) to run it.
27
+
28
+ ## Installation
29
+
30
+ Install [Python](https://www.python.org/downloads/) **3.12 or later** and [pipx](https://pipx.pypa.io/stable/installation/), then run:
31
+
32
+ ```bash
33
+ pipx install preling # install
34
+ pipx upgrade preling # upgrade
35
+ pipx uninstall preling # uninstall (hopefully you won't need this)
36
+ ```
37
+
38
+ ## Initialize a New Language
39
+
40
+ Prepare a plain-text file that contains **one sentence per line** in the language you want to learn. For example, you can download a monolingual corpus from [OPUS](https://opus.nlpl.eu/). Then run:
41
+
42
+ ```bash
43
+ preling init <lang> <corpus>
44
+ ```
45
+
46
+ `<lang>` is the [language code](https://spacy.io/usage/models#languages), and `<corpus>` is the path to the corpus file.
47
+
48
+ ## Study a Language
49
+
50
+ ```bash
51
+ preling study <lang> [--audio] [--audio-only] [--model <GPT_MODEL>] \
52
+ [--tts-model <TTS_MODEL>] [--api-key <OPENAI_KEY>]
53
+ ```
54
+
55
+ * **`<lang>`** – the language code you initialized earlier.
56
+ * **`--audio`** – play audio along with the text.
57
+ * **`--audio-only`** – play audio without displaying the text.
58
+ * **`--model`** – the GPT model to use for grammar evaluation.
59
+ * **`--tts-model`** – the text-to-speech model to use for audio playback.
60
+ * **`--api-key`** – your OpenAI API key.
61
+
62
+ Instead of passing these options each time, you can set the environment variables `PRELING_API_KEY`, `PRELING_MODEL`, and `PRELING_TTS_MODEL`.
63
+
64
+ ## View Your Progress
65
+
66
+ ```bash
67
+ preling stats <lang>
68
+ ```
69
+
70
+ ## Other Commands
71
+
72
+ ```bash
73
+ preling path <lang> # show the path to the language-data file
74
+ preling delete <lang> [--force] # delete the language-data file; use --force to skip the confirmation prompt
75
+ ```
@@ -0,0 +1,52 @@
1
+ # PreLing
2
+
3
+ PreLing is a command-line tool for improving language-comprehension skills through gradual exposure to new vocabulary. It supports every language that [SpaCy supports](https://spacy.io/usage/models#languages). Because PreLing uses GPT under the hood, you must have a paid [OpenAI account](https://platform.openai.com/) and an [API key](https://platform.openai.com/api-keys) to run it.
4
+
5
+ ## Installation
6
+
7
+ Install [Python](https://www.python.org/downloads/) **3.12 or later** and [pipx](https://pipx.pypa.io/stable/installation/), then run:
8
+
9
+ ```bash
10
+ pipx install preling # install
11
+ pipx upgrade preling # upgrade
12
+ pipx uninstall preling # uninstall (hopefully you won't need this)
13
+ ```
14
+
15
+ ## Initialize a New Language
16
+
17
+ Prepare a plain-text file that contains **one sentence per line** in the language you want to learn. For example, you can download a monolingual corpus from [OPUS](https://opus.nlpl.eu/). Then run:
18
+
19
+ ```bash
20
+ preling init <lang> <corpus>
21
+ ```
22
+
23
+ `<lang>` is the [language code](https://spacy.io/usage/models#languages), and `<corpus>` is the path to the corpus file.
24
+
25
+ ## Study a Language
26
+
27
+ ```bash
28
+ preling study <lang> [--audio] [--audio-only] [--model <GPT_MODEL>] \
29
+ [--tts-model <TTS_MODEL>] [--api-key <OPENAI_KEY>]
30
+ ```
31
+
32
+ * **`<lang>`** – the language code you initialized earlier.
33
+ * **`--audio`** – play audio along with the text.
34
+ * **`--audio-only`** – play audio without displaying the text.
35
+ * **`--model`** – the GPT model to use for grammar evaluation.
36
+ * **`--tts-model`** – the text-to-speech model to use for audio playback.
37
+ * **`--api-key`** – your OpenAI API key.
38
+
39
+ Instead of passing these options each time, you can set the environment variables `PRELING_API_KEY`, `PRELING_MODEL`, and `PRELING_TTS_MODEL`.
40
+
41
+ ## View Your Progress
42
+
43
+ ```bash
44
+ preling stats <lang>
45
+ ```
46
+
47
+ ## Other Commands
48
+
49
+ ```bash
50
+ preling path <lang> # show the path to the language-data file
51
+ preling delete <lang> [--force] # delete the language-data file; use --force to skip the confirmation prompt
52
+ ```
@@ -0,0 +1,6 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=42",
4
+ "wheel"
5
+ ]
6
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,41 @@
1
+ [metadata]
2
+ name = preling
3
+ version = 1.0.0
4
+ author = Danylo Mysak
5
+ author_email = danmysak@gmail.com
6
+ description = Command-line tool for learning foreign languages through gradual exposure to new vocabulary
7
+ long_description = file: README.md
8
+ long_description_content_type = text/markdown
9
+ url = https://github.com/danmysak/preling
10
+ project_urls =
11
+ Bug Tracker = https://github.com/danmysak/preling/issues
12
+ classifiers =
13
+ Programming Language :: Python :: 3
14
+ License :: OSI Approved :: MIT License
15
+ Operating System :: OS Independent
16
+
17
+ [options]
18
+ package_dir =
19
+ =src
20
+ packages = find:
21
+ python_requires = >=3.12
22
+ install_requires =
23
+ openai[voice_helpers]
24
+ prompt_toolkit
25
+ pyaudio
26
+ spacy
27
+ SQLAlchemy
28
+ tqdm
29
+ typer
30
+
31
+ [options.entry_points]
32
+ console_scripts =
33
+ preling = preling.preling:app
34
+
35
+ [options.packages.find]
36
+ where = src
37
+
38
+ [egg_info]
39
+ tag_build =
40
+ tag_date = 0
41
+
File without changes
@@ -0,0 +1,6 @@
1
+ from . import commands # Register commands with Typer
2
+ from .app import app
3
+
4
+ __all__ = [
5
+ 'app',
6
+ ]
@@ -0,0 +1,7 @@
1
+ import typer
2
+
3
+ __all__ = [
4
+ 'app',
5
+ ]
6
+
7
+ app = typer.Typer(pretty_exceptions_enable=False)
@@ -0,0 +1,13 @@
1
+ from .delete import delete
2
+ from .init import init
3
+ from .path import path
4
+ from .stats import stats
5
+ from .study import study
6
+
7
+ __all__ = [
8
+ 'delete',
9
+ 'init',
10
+ 'path',
11
+ 'stats',
12
+ 'study',
13
+ ]
@@ -0,0 +1,40 @@
1
+ from typing import Annotated
2
+
3
+ from typer import Argument, Option
4
+
5
+ from preling.app.app import app
6
+ from preling.db import get_path
7
+ from preling.utils.typer import typer_raise
8
+
9
+ __all__ = [
10
+ 'delete',
11
+ ]
12
+
13
+
14
+ @app.command()
15
+ def delete(
16
+ language: Annotated[
17
+ str,
18
+ Argument(help='Language code whose data should be removed.'),
19
+ ],
20
+ force: Annotated[
21
+ bool | None,
22
+ Option(
23
+ '--force',
24
+ '-f',
25
+ help='Skip the confirmation prompt and delete immediately.',
26
+ ),
27
+ ] = False,
28
+ ) -> None:
29
+ """Delete all stored data for `language`."""
30
+ path = get_path(language)
31
+ if not path.exists():
32
+ typer_raise(f'Language "{language}" is not initialized.')
33
+
34
+ if not force and not input(
35
+ f'Are you sure you want to delete all data for "{language}"? (y/N): ',
36
+ ).lower().startswith('y'):
37
+ typer_raise('Operation canceled.')
38
+
39
+ path.unlink(missing_ok=True)
40
+ print(f'Deleted all data for "{language}".')
@@ -0,0 +1,123 @@
1
+ from __future__ import annotations
2
+ from collections import Counter
3
+ from pathlib import Path
4
+ from typing import Annotated, Generator, TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from spacy.language import Language
8
+ from tqdm import tqdm
9
+ from typer import Argument
10
+
11
+ from preling.app.app import app
12
+ from preling.db import get_session, Session
13
+ from preling.db.models import Sentence, SentenceWord, Word
14
+ from preling.utils.typer import typer_raise
15
+
16
+ __all__ = [
17
+ 'init',
18
+ ]
19
+
20
+
21
+ def get_nlp(language: str) -> Language:
22
+ """Get a spaCy language model for the specified language."""
23
+ import spacy
24
+ try:
25
+ return spacy.blank(language)
26
+ except ImportError:
27
+ typer_raise(f'Language "{language}" is not supported by spaCy.')
28
+
29
+
30
+ def get_sentences(corpus: Path) -> Generator[str, None, None]:
31
+ """Yield sentences from the corpus file."""
32
+ with corpus.open('r', encoding='utf-8') as file:
33
+ for line in file:
34
+ if sentence := line.strip():
35
+ yield sentence
36
+
37
+
38
+ def extract_words(nlp: Language, sentence: str) -> list[str]:
39
+ """Extract words from a sentence using spaCy."""
40
+ return [token.lower_ for token in nlp(sentence) if token.is_alpha]
41
+
42
+
43
+ def process_corpus(language: str, corpus: Path) -> tuple[dict[str, list[str]], Counter[str]]:
44
+ """Process the corpus and return words by sentence and word frequencies."""
45
+ nlp = get_nlp(language)
46
+
47
+ words_by_sentence: dict[str, list[str]] = {}
48
+ word_frequencies: Counter[str] = Counter()
49
+
50
+ for sentence in tqdm(get_sentences(corpus), desc=f'Processing sentences'):
51
+ if sentence not in words_by_sentence and (words := extract_words(nlp, sentence)):
52
+ words_by_sentence[sentence] = words
53
+ word_frequencies.update(words)
54
+
55
+ return words_by_sentence, word_frequencies
56
+
57
+
58
+ def add_words(session: Session, frequencies: Counter[str]) -> dict[str, int]:
59
+ """Add words to the database and return a mapping of words to their IDs."""
60
+ ids_by_word: dict[str, int] = {}
61
+ for word, occurrences in tqdm(frequencies.most_common(), desc='Adding words'):
62
+ word_obj = Word(
63
+ word=word,
64
+ occurrences=occurrences,
65
+ streak_start=None,
66
+ due=None,
67
+ )
68
+ session.add(word_obj)
69
+ session.flush()
70
+ ids_by_word[word] = word_obj.id
71
+ return ids_by_word
72
+
73
+
74
+ def add_sentences(session: Session, words_by_sentence: dict[str, list[str]], ids_by_word: dict[str, int]) -> None:
75
+ """Add sentences to the database."""
76
+ for sentence_text, words in tqdm(words_by_sentence.items(), desc='Adding sentences'):
77
+ sentence_obj = Sentence(
78
+ sentence=sentence_text,
79
+ correct_attempts=0,
80
+ incorrect_attempts=0,
81
+ )
82
+ session.add(sentence_obj)
83
+ session.flush()
84
+ for word_index, word in enumerate(words):
85
+ session.add(SentenceWord(
86
+ sentence_id=sentence_obj.id,
87
+ word_index=word_index,
88
+ word_id=ids_by_word[word],
89
+ ))
90
+ session.flush()
91
+
92
+
93
+ @app.command()
94
+ def init(
95
+ language: Annotated[
96
+ str,
97
+ Argument(help='Language code supported by spaCy (e.g., "en", "fr", "uk").'),
98
+ ],
99
+ corpus: Annotated[
100
+ Path,
101
+ Argument(
102
+ dir_okay=False,
103
+ exists=True,
104
+ readable=True,
105
+ resolve_path=True,
106
+ help='Plain‑text file containing one sentence per line.',
107
+ ),
108
+ ],
109
+ ) -> None:
110
+ """Initialise PreLing for a new language."""
111
+ with get_session(language) as session:
112
+ if session.query(Sentence).first():
113
+ typer_raise(f'PreLing is already initialized for language "{language}".')
114
+ words_by_sentence, word_frequencies = process_corpus(language, corpus)
115
+ if not word_frequencies:
116
+ typer_raise(f'No valid sentences found in the corpus.')
117
+ ids_by_word = add_words(session, word_frequencies)
118
+ add_sentences(session, words_by_sentence, ids_by_word)
119
+ print('Committing changes to the database...')
120
+ session.commit()
121
+ print(f'Initialized PreLing for language "{language}" '
122
+ f'with {len(words_by_sentence)} unique sentences '
123
+ f'and {len(word_frequencies)} unique words.')
@@ -0,0 +1,26 @@
1
+ from typing import Annotated
2
+
3
+ from typer import Argument
4
+
5
+ from preling.app.app import app
6
+ from preling.db import get_path
7
+ from preling.utils.typer import typer_raise
8
+
9
+ __all__ = [
10
+ 'path',
11
+ ]
12
+
13
+
14
+ @app.command()
15
+ def path(
16
+ language: Annotated[
17
+ str,
18
+ Argument(help='Language code whose data file should be printed.'),
19
+ ],
20
+ ) -> None:
21
+ """Print the absolute path to PreLing’s data file for `language`."""
22
+ language_path = get_path(language)
23
+ if language_path.exists():
24
+ print(language_path.absolute())
25
+ else:
26
+ typer_raise(f'Language "{language}" is not initialized.')
@@ -0,0 +1,123 @@
1
+ from __future__ import annotations
2
+ from typing import Annotated
3
+
4
+ from dataclasses import dataclass, field
5
+ from rich.console import Console
6
+ from rich.text import Text
7
+ from tqdm import tqdm
8
+ from typer import Argument
9
+
10
+ from preling.app.app import app
11
+ from preling.db import get_session, Session
12
+ from preling.db.models import Sentence, Word
13
+ from preling.utils.time import get_timestamp
14
+
15
+ __all__ = [
16
+ 'stats',
17
+ ]
18
+
19
+
20
+ @dataclass
21
+ class WordStatsItem:
22
+ count: int = 0
23
+ occurrences: int = 0
24
+
25
+ def add_word(self, word: Word) -> None:
26
+ """Add a word to the stats item."""
27
+ self.count += 1
28
+ self.occurrences += word.occurrences
29
+
30
+ def format(self, total: int) -> Text:
31
+ """Format the word stats item for display."""
32
+ return Text(f'{self.count} ') + Text(f'(coverage: {self.occurrences / max(total, 1):.1%})', style='dim')
33
+
34
+
35
+ @dataclass
36
+ class WordStats:
37
+ active: WordStatsItem = field(default_factory=WordStatsItem)
38
+ seen: WordStatsItem = field(default_factory=WordStatsItem)
39
+ total: WordStatsItem = field(default_factory=WordStatsItem)
40
+
41
+
42
+ @dataclass
43
+ class SentenceStats:
44
+ correct: int = 0
45
+ seen: int = 0
46
+ total_sentences: int = 0
47
+ total_attempts: int = 0
48
+
49
+
50
+ def compute_word_stats(session: Session) -> WordStats:
51
+ """Compute word statistics for the current language."""
52
+ now = get_timestamp()
53
+ word_stats = WordStats()
54
+ word: Word
55
+ for word in tqdm(session.query(Word).all(), desc='Computing word statistics', leave=False):
56
+ word_stats.total.add_word(word)
57
+ if word.due is not None:
58
+ word_stats.seen.add_word(word)
59
+ if word.due > now:
60
+ word_stats.active.add_word(word)
61
+ return word_stats
62
+
63
+
64
+ def compute_sentence_stats(session: Session) -> SentenceStats:
65
+ """Compute sentence statistics for the current language."""
66
+ sentence_stats = SentenceStats()
67
+ for sentence in tqdm(session.query(Sentence).all(), desc='Computing sentence statistics', leave=False):
68
+ if sentence.correct_attempts:
69
+ sentence_stats.correct += 1
70
+ if sentence.incorrect_attempts or sentence.correct_attempts:
71
+ sentence_stats.seen += 1
72
+ sentence_stats.total_sentences += 1
73
+ sentence_stats.total_attempts += sentence.correct_attempts + sentence.incorrect_attempts
74
+ return sentence_stats
75
+
76
+
77
+ def format_section_title(title: str) -> Text:
78
+ """Format a section title for display."""
79
+ return Text(title, style='bold underline')
80
+
81
+
82
+ def format_stats_label(title: str) -> Text:
83
+ """Format a stats title for display."""
84
+ return Text(f'{title}:', style='bold')
85
+
86
+
87
+ def print_word_stats(console: Console, word_stats: WordStats) -> None:
88
+ """Print word statistics to the console."""
89
+ total_occurrences = word_stats.total.occurrences
90
+ console.print(format_section_title('Word Statistics'))
91
+ console.print(format_stats_label('In retention'), word_stats.active.format(total_occurrences))
92
+ console.print(format_stats_label('Seen words'), word_stats.seen.format(total_occurrences))
93
+ console.print(format_stats_label('Total words'), word_stats.total.format(total_occurrences))
94
+
95
+
96
+ def print_sentence_stats(console: Console, sentence_stats: SentenceStats) -> None:
97
+ """Print sentence statistics to the console."""
98
+ console.print(format_section_title('Sentence Statistics'))
99
+ console.print(format_stats_label('Correct at least once'), sentence_stats.correct)
100
+ console.print(
101
+ format_stats_label('Seen sentences'),
102
+ sentence_stats.seen,
103
+ Text(f'(out of {sentence_stats.total_sentences})', style='dim'),
104
+ )
105
+ console.print(format_stats_label('Total attempts'), sentence_stats.total_attempts)
106
+
107
+
108
+ @app.command()
109
+ def stats(
110
+ language: Annotated[
111
+ str,
112
+ Argument(help='Language code to show study statistics for.'),
113
+ ],
114
+ ) -> None:
115
+ """Display study statistics for the given language."""
116
+ with get_session(language) as session:
117
+ word_stats = compute_word_stats(session)
118
+ sentence_stats = compute_sentence_stats(session)
119
+
120
+ console = Console(highlight=False)
121
+ print_word_stats(console, word_stats)
122
+ console.print()
123
+ print_sentence_stats(console, sentence_stats)