PyPI - preling - Versions diffs - 1.0.0__tar.gz - Mend

preling 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

preling-1.0.0/LICENSE +21 -0
preling-1.0.0/PKG-INFO +75 -0
preling-1.0.0/README.md +52 -0
preling-1.0.0/pyproject.toml +6 -0
preling-1.0.0/setup.cfg +41 -0
preling-1.0.0/src/preling/__init__.py +0 -0
preling-1.0.0/src/preling/app/__init__.py +6 -0
preling-1.0.0/src/preling/app/app.py +7 -0
preling-1.0.0/src/preling/app/commands/__init__.py +13 -0
preling-1.0.0/src/preling/app/commands/delete.py +40 -0
preling-1.0.0/src/preling/app/commands/init.py +123 -0
preling-1.0.0/src/preling/app/commands/path.py +26 -0
preling-1.0.0/src/preling/app/commands/stats.py +123 -0
preling-1.0.0/src/preling/app/commands/study/__init__.py +201 -0
preling-1.0.0/src/preling/app/commands/study/chooser.py +79 -0
preling-1.0.0/src/preling/app/commands/study/evaluator.py +137 -0
preling-1.0.0/src/preling/app/commands/study/interaction.py +71 -0
preling-1.0.0/src/preling/app/commands/study/tts.py +41 -0
preling-1.0.0/src/preling/app/commands/study/updater.py +30 -0
preling-1.0.0/src/preling/db/__init__.py +54 -0
preling-1.0.0/src/preling/db/base.py +9 -0
preling-1.0.0/src/preling/db/models.py +82 -0
preling-1.0.0/src/preling/preling.py +4 -0
preling-1.0.0/src/preling/utils/__init__.py +0 -0
preling-1.0.0/src/preling/utils/console.py +7 -0
preling-1.0.0/src/preling/utils/paths.py +31 -0
preling-1.0.0/src/preling/utils/time.py +9 -0
preling-1.0.0/src/preling/utils/typer.py +13 -0
preling-1.0.0/src/preling.egg-info/PKG-INFO +75 -0
preling-1.0.0/src/preling.egg-info/SOURCES.txt +33 -0
preling-1.0.0/src/preling.egg-info/dependency_links.txt +1 -0
preling-1.0.0/src/preling.egg-info/entry_points.txt +2 -0
preling-1.0.0/src/preling.egg-info/requires.txt +7 -0
preling-1.0.0/src/preling.egg-info/top_level.txt +1 -0

preling-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Danylo Mysak
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

preling-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,75 @@
+Metadata-Version: 2.4
+Name: preling
+Version: 1.0.0
+Summary: Command-line tool for learning foreign languages through gradual exposure to new vocabulary
+Home-page: https://github.com/danmysak/preling
+Author: Danylo Mysak
+Author-email: danmysak@gmail.com
+Project-URL: Bug Tracker, https://github.com/danmysak/preling/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: openai[voice_helpers]
+Requires-Dist: prompt_toolkit
+Requires-Dist: pyaudio
+Requires-Dist: spacy
+Requires-Dist: SQLAlchemy
+Requires-Dist: tqdm
+Requires-Dist: typer
+Dynamic: license-file
+# PreLing
+PreLing is a command-line tool for improving language-comprehension skills through gradual exposure to new vocabulary. It supports every language that [SpaCy supports](https://spacy.io/usage/models#languages). Because PreLing uses GPT under the hood, you must have a paid [OpenAI account](https://platform.openai.com/) and an [API key](https://platform.openai.com/api-keys) to run it.
+## Installation
+Install [Python](https://www.python.org/downloads/) **3.12 or later** and [pipx](https://pipx.pypa.io/stable/installation/), then run:
+```bash
+pipx install preling          # install
+pipx upgrade preling          # upgrade
+pipx uninstall preling        # uninstall (hopefully you won't need this)
+```
+## Initialize a New Language
+Prepare a plain-text file that contains **one sentence per line** in the language you want to learn. For example, you can download a monolingual corpus from [OPUS](https://opus.nlpl.eu/). Then run:
+```bash
+preling init <lang> <corpus>
+```
+`<lang>` is the [language code](https://spacy.io/usage/models#languages), and `<corpus>` is the path to the corpus file.
+## Study a Language
+```bash
+preling study <lang> [--audio] [--audio-only] [--model <GPT_MODEL>] \
+               [--tts-model <TTS_MODEL>] [--api-key <OPENAI_KEY>]
+```
+* **`<lang>`** – the language code you initialized earlier.
+* **`--audio`** – play audio along with the text.
+* **`--audio-only`** – play audio without displaying the text.
+* **`--model`** – the GPT model to use for grammar evaluation.
+* **`--tts-model`** – the text-to-speech model to use for audio playback.
+* **`--api-key`** – your OpenAI API key.
+Instead of passing these options each time, you can set the environment variables `PRELING_API_KEY`, `PRELING_MODEL`, and `PRELING_TTS_MODEL`.
+## View Your Progress
+```bash
+preling stats <lang>
+```
+## Other Commands
+```bash
+preling path <lang>               # show the path to the language-data file
+preling delete <lang> [--force]   # delete the language-data file; use --force to skip the confirmation prompt
+```

preling-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,52 @@
+# PreLing
+PreLing is a command-line tool for improving language-comprehension skills through gradual exposure to new vocabulary. It supports every language that [SpaCy supports](https://spacy.io/usage/models#languages). Because PreLing uses GPT under the hood, you must have a paid [OpenAI account](https://platform.openai.com/) and an [API key](https://platform.openai.com/api-keys) to run it.
+## Installation
+Install [Python](https://www.python.org/downloads/) **3.12 or later** and [pipx](https://pipx.pypa.io/stable/installation/), then run:
+```bash
+pipx install preling          # install
+pipx upgrade preling          # upgrade
+pipx uninstall preling        # uninstall (hopefully you won't need this)
+```
+## Initialize a New Language
+Prepare a plain-text file that contains **one sentence per line** in the language you want to learn. For example, you can download a monolingual corpus from [OPUS](https://opus.nlpl.eu/). Then run:
+```bash
+preling init <lang> <corpus>
+```
+`<lang>` is the [language code](https://spacy.io/usage/models#languages), and `<corpus>` is the path to the corpus file.
+## Study a Language
+```bash
+preling study <lang> [--audio] [--audio-only] [--model <GPT_MODEL>] \
+               [--tts-model <TTS_MODEL>] [--api-key <OPENAI_KEY>]
+```
+* **`<lang>`** – the language code you initialized earlier.
+* **`--audio`** – play audio along with the text.
+* **`--audio-only`** – play audio without displaying the text.
+* **`--model`** – the GPT model to use for grammar evaluation.
+* **`--tts-model`** – the text-to-speech model to use for audio playback.
+* **`--api-key`** – your OpenAI API key.
+Instead of passing these options each time, you can set the environment variables `PRELING_API_KEY`, `PRELING_MODEL`, and `PRELING_TTS_MODEL`.
+## View Your Progress
+```bash
+preling stats <lang>
+```
+## Other Commands
+```bash
+preling path <lang>               # show the path to the language-data file
+preling delete <lang> [--force]   # delete the language-data file; use --force to skip the confirmation prompt
+```

preling-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel"
+]
+build-backend = "setuptools.build_meta"

preling-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,41 @@
+[metadata]
+name = preling
+version = 1.0.0
+author = Danylo Mysak
+author_email = danmysak@gmail.com
+description = Command-line tool for learning foreign languages through gradual exposure to new vocabulary
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://github.com/danmysak/preling
+project_urls =
+	Bug Tracker = https://github.com/danmysak/preling/issues
+classifiers =
+	Programming Language :: Python :: 3
+	License :: OSI Approved :: MIT License
+	Operating System :: OS Independent
+[options]
+package_dir =
+	=src
+packages = find:
+python_requires = >=3.12
+install_requires =
+	openai[voice_helpers]
+	prompt_toolkit
+	pyaudio
+	spacy
+	SQLAlchemy
+	tqdm
+	typer
+[options.entry_points]
+console_scripts =
+	preling = preling.preling:app
+[options.packages.find]
+where = src
+[egg_info]
+tag_build =
+tag_date = 0

preling-1.0.0/src/preling/__init__.py ADDED Viewed

File without changes

preling-1.0.0/src/preling/app/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from . import commands  # Register commands with Typer
+from .app import app
+__all__ = [
+    'app',
+]

preling-1.0.0/src/preling/app/app.py ADDED Viewed

@@ -0,0 +1,7 @@
+import typer
+__all__ = [
+    'app',
+]
+app = typer.Typer(pretty_exceptions_enable=False)

preling-1.0.0/src/preling/app/commands/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .delete import delete
+from .init import init
+from .path import path
+from .stats import stats
+from .study import study
+__all__ = [
+    'delete',
+    'init',
+    'path',
+    'stats',
+    'study',
+]

preling-1.0.0/src/preling/app/commands/delete.py ADDED Viewed

@@ -0,0 +1,40 @@
+from typing import Annotated
+from typer import Argument, Option
+from preling.app.app import app
+from preling.db import get_path
+from preling.utils.typer import typer_raise
+__all__ = [
+    'delete',
+]
+@app.command()
+def delete(
+        language: Annotated[
+            str,
+            Argument(help='Language code whose data should be removed.'),
+        ],
+        force: Annotated[
+            bool | None,
+            Option(
+                '--force',
+                '-f',
+                help='Skip the confirmation prompt and delete immediately.',
+            ),
+        ] = False,
+) -> None:
+    """Delete all stored data for `language`."""
+    path = get_path(language)
+    if not path.exists():
+        typer_raise(f'Language "{language}" is not initialized.')
+    if not force and not input(
+            f'Are you sure you want to delete all data for "{language}"? (y/N): ',
+    ).lower().startswith('y'):
+        typer_raise('Operation canceled.')
+    path.unlink(missing_ok=True)
+    print(f'Deleted all data for "{language}".')

preling-1.0.0/src/preling/app/commands/init.py ADDED Viewed

@@ -0,0 +1,123 @@
+from __future__ import annotations
+from collections import Counter
+from pathlib import Path
+from typing import Annotated, Generator, TYPE_CHECKING
+if TYPE_CHECKING:
+    from spacy.language import Language
+from tqdm import tqdm
+from typer import Argument
+from preling.app.app import app
+from preling.db import get_session, Session
+from preling.db.models import Sentence, SentenceWord, Word
+from preling.utils.typer import typer_raise
+__all__ = [
+    'init',
+]
+def get_nlp(language: str) -> Language:
+    """Get a spaCy language model for the specified language."""
+    import spacy
+    try:
+        return spacy.blank(language)
+    except ImportError:
+        typer_raise(f'Language "{language}" is not supported by spaCy.')
+def get_sentences(corpus: Path) -> Generator[str, None, None]:
+    """Yield sentences from the corpus file."""
+    with corpus.open('r', encoding='utf-8') as file:
+        for line in file:
+            if sentence := line.strip():
+                yield sentence
+def extract_words(nlp: Language, sentence: str) -> list[str]:
+    """Extract words from a sentence using spaCy."""
+    return [token.lower_ for token in nlp(sentence) if token.is_alpha]
+def process_corpus(language: str, corpus: Path) -> tuple[dict[str, list[str]], Counter[str]]:
+    """Process the corpus and return words by sentence and word frequencies."""
+    nlp = get_nlp(language)
+    words_by_sentence: dict[str, list[str]] = {}
+    word_frequencies: Counter[str] = Counter()
+    for sentence in tqdm(get_sentences(corpus), desc=f'Processing sentences'):
+        if sentence not in words_by_sentence and (words := extract_words(nlp, sentence)):
+            words_by_sentence[sentence] = words
+            word_frequencies.update(words)
+    return words_by_sentence, word_frequencies
+def add_words(session: Session, frequencies: Counter[str]) -> dict[str, int]:
+    """Add words to the database and return a mapping of words to their IDs."""
+    ids_by_word: dict[str, int] = {}
+    for word, occurrences in tqdm(frequencies.most_common(), desc='Adding words'):
+        word_obj = Word(
+            word=word,
+            occurrences=occurrences,
+            streak_start=None,
+            due=None,
+        )
+        session.add(word_obj)
+        session.flush()
+        ids_by_word[word] = word_obj.id
+    return ids_by_word
+def add_sentences(session: Session, words_by_sentence: dict[str, list[str]], ids_by_word: dict[str, int]) -> None:
+    """Add sentences to the database."""
+    for sentence_text, words in tqdm(words_by_sentence.items(), desc='Adding sentences'):
+        sentence_obj = Sentence(
+            sentence=sentence_text,
+            correct_attempts=0,
+            incorrect_attempts=0,
+        )
+        session.add(sentence_obj)
+        session.flush()
+        for word_index, word in enumerate(words):
+            session.add(SentenceWord(
+                sentence_id=sentence_obj.id,
+                word_index=word_index,
+                word_id=ids_by_word[word],
+            ))
+        session.flush()
+@app.command()
+def init(
+        language: Annotated[
+            str,
+            Argument(help='Language code supported by spaCy (e.g., "en", "fr", "uk").'),
+        ],
+        corpus: Annotated[
+            Path,
+            Argument(
+                dir_okay=False,
+                exists=True,
+                readable=True,
+                resolve_path=True,
+                help='Plain‑text file containing one sentence per line.',
+            ),
+        ],
+) -> None:
+    """Initialise PreLing for a new language."""
+    with get_session(language) as session:
+        if session.query(Sentence).first():
+            typer_raise(f'PreLing is already initialized for language "{language}".')
+        words_by_sentence, word_frequencies = process_corpus(language, corpus)
+        if not word_frequencies:
+            typer_raise(f'No valid sentences found in the corpus.')
+        ids_by_word = add_words(session, word_frequencies)
+        add_sentences(session, words_by_sentence, ids_by_word)
+        print('Committing changes to the database...')
+        session.commit()
+    print(f'Initialized PreLing for language "{language}" '
+          f'with {len(words_by_sentence)} unique sentences '
+          f'and {len(word_frequencies)} unique words.')

preling-1.0.0/src/preling/app/commands/path.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Annotated
+from typer import Argument
+from preling.app.app import app
+from preling.db import get_path
+from preling.utils.typer import typer_raise
+__all__ = [
+    'path',
+]
+@app.command()
+def path(
+        language: Annotated[
+            str,
+            Argument(help='Language code whose data file should be printed.'),
+        ],
+) -> None:
+    """Print the absolute path to PreLing’s data file for `language`."""
+    language_path = get_path(language)
+    if language_path.exists():
+        print(language_path.absolute())
+    else:
+        typer_raise(f'Language "{language}" is not initialized.')

preling-1.0.0/src/preling/app/commands/stats.py ADDED Viewed

@@ -0,0 +1,123 @@
+from __future__ import annotations
+from typing import Annotated
+from dataclasses import dataclass, field
+from rich.console import Console
+from rich.text import Text
+from tqdm import tqdm
+from typer import Argument
+from preling.app.app import app
+from preling.db import get_session, Session
+from preling.db.models import Sentence, Word
+from preling.utils.time import get_timestamp
+__all__ = [
+    'stats',
+]
+@dataclass
+class WordStatsItem:
+    count: int = 0
+    occurrences: int = 0
+    def add_word(self, word: Word) -> None:
+        """Add a word to the stats item."""
+        self.count += 1
+        self.occurrences += word.occurrences
+    def format(self, total: int) -> Text:
+        """Format the word stats item for display."""
+        return Text(f'{self.count} ') + Text(f'(coverage: {self.occurrences / max(total, 1):.1%})', style='dim')
+@dataclass
+class WordStats:
+    active: WordStatsItem = field(default_factory=WordStatsItem)
+    seen: WordStatsItem = field(default_factory=WordStatsItem)
+    total: WordStatsItem = field(default_factory=WordStatsItem)
+@dataclass
+class SentenceStats:
+    correct: int = 0
+    seen: int = 0
+    total_sentences: int = 0
+    total_attempts: int = 0
+def compute_word_stats(session: Session) -> WordStats:
+    """Compute word statistics for the current language."""
+    now = get_timestamp()
+    word_stats = WordStats()
+    word: Word
+    for word in tqdm(session.query(Word).all(), desc='Computing word statistics', leave=False):
+        word_stats.total.add_word(word)
+        if word.due is not None:
+            word_stats.seen.add_word(word)
+            if word.due > now:
+                word_stats.active.add_word(word)
+    return word_stats
+def compute_sentence_stats(session: Session) -> SentenceStats:
+    """Compute sentence statistics for the current language."""
+    sentence_stats = SentenceStats()
+    for sentence in tqdm(session.query(Sentence).all(), desc='Computing sentence statistics', leave=False):
+        if sentence.correct_attempts:
+            sentence_stats.correct += 1
+        if sentence.incorrect_attempts or sentence.correct_attempts:
+            sentence_stats.seen += 1
+        sentence_stats.total_sentences += 1
+        sentence_stats.total_attempts += sentence.correct_attempts + sentence.incorrect_attempts
+    return sentence_stats
+def format_section_title(title: str) -> Text:
+    """Format a section title for display."""
+    return Text(title, style='bold underline')
+def format_stats_label(title: str) -> Text:
+    """Format a stats title for display."""
+    return Text(f'{title}:', style='bold')
+def print_word_stats(console: Console, word_stats: WordStats) -> None:
+    """Print word statistics to the console."""
+    total_occurrences = word_stats.total.occurrences
+    console.print(format_section_title('Word Statistics'))
+    console.print(format_stats_label('In retention'), word_stats.active.format(total_occurrences))
+    console.print(format_stats_label('Seen words'), word_stats.seen.format(total_occurrences))
+    console.print(format_stats_label('Total words'), word_stats.total.format(total_occurrences))
+def print_sentence_stats(console: Console, sentence_stats: SentenceStats) -> None:
+    """Print sentence statistics to the console."""
+    console.print(format_section_title('Sentence Statistics'))
+    console.print(format_stats_label('Correct at least once'), sentence_stats.correct)
+    console.print(
+        format_stats_label('Seen sentences'),
+        sentence_stats.seen,
+        Text(f'(out of {sentence_stats.total_sentences})', style='dim'),
+    )
+    console.print(format_stats_label('Total attempts'), sentence_stats.total_attempts)
+@app.command()
+def stats(
+        language: Annotated[
+            str,
+            Argument(help='Language code to show study statistics for.'),
+        ],
+) -> None:
+    """Display study statistics for the given language."""
+    with get_session(language) as session:
+        word_stats = compute_word_stats(session)
+        sentence_stats = compute_sentence_stats(session)
+    console = Console(highlight=False)
+    print_word_stats(console, word_stats)
+    console.print()
+    print_sentence_stats(console, sentence_stats)