crossrs 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. crossrs-0.1.0/LICENSE +21 -0
  2. crossrs-0.1.0/PKG-INFO +90 -0
  3. crossrs-0.1.0/README.md +66 -0
  4. crossrs-0.1.0/pyproject.toml +6 -0
  5. crossrs-0.1.0/setup.cfg +42 -0
  6. crossrs-0.1.0/src/crossrs/__init__.py +1 -0
  7. crossrs-0.1.0/src/crossrs/app/__init__.py +6 -0
  8. crossrs-0.1.0/src/crossrs/app/app.py +7 -0
  9. crossrs-0.1.0/src/crossrs/app/commands/__init__.py +13 -0
  10. crossrs-0.1.0/src/crossrs/app/commands/delete.py +40 -0
  11. crossrs-0.1.0/src/crossrs/app/commands/init.py +118 -0
  12. crossrs-0.1.0/src/crossrs/app/commands/path.py +26 -0
  13. crossrs-0.1.0/src/crossrs/app/commands/stats.py +138 -0
  14. crossrs-0.1.0/src/crossrs/app/commands/study/__init__.py +300 -0
  15. crossrs-0.1.0/src/crossrs/app/commands/study/chooser.py +165 -0
  16. crossrs-0.1.0/src/crossrs/app/commands/study/evaluator.py +162 -0
  17. crossrs-0.1.0/src/crossrs/app/commands/study/explainer.py +43 -0
  18. crossrs-0.1.0/src/crossrs/app/commands/study/interaction.py +80 -0
  19. crossrs-0.1.0/src/crossrs/app/commands/study/updater.py +103 -0
  20. crossrs-0.1.0/src/crossrs/crossrs.py +4 -0
  21. crossrs-0.1.0/src/crossrs/db/__init__.py +51 -0
  22. crossrs-0.1.0/src/crossrs/db/base.py +9 -0
  23. crossrs-0.1.0/src/crossrs/db/models.py +113 -0
  24. crossrs-0.1.0/src/crossrs/diff/__init__.py +40 -0
  25. crossrs-0.1.0/src/crossrs/diff/tokenizer.py +41 -0
  26. crossrs-0.1.0/src/crossrs/utils/__init__.py +1 -0
  27. crossrs-0.1.0/src/crossrs/utils/console.py +12 -0
  28. crossrs-0.1.0/src/crossrs/utils/paths.py +31 -0
  29. crossrs-0.1.0/src/crossrs/utils/strings.py +9 -0
  30. crossrs-0.1.0/src/crossrs/utils/time.py +9 -0
  31. crossrs-0.1.0/src/crossrs/utils/typer.py +13 -0
  32. crossrs-0.1.0/src/crossrs.egg-info/PKG-INFO +90 -0
  33. crossrs-0.1.0/src/crossrs.egg-info/SOURCES.txt +37 -0
  34. crossrs-0.1.0/src/crossrs.egg-info/dependency_links.txt +1 -0
  35. crossrs-0.1.0/src/crossrs.egg-info/entry_points.txt +2 -0
  36. crossrs-0.1.0/src/crossrs.egg-info/requires.txt +8 -0
  37. crossrs-0.1.0/src/crossrs.egg-info/top_level.txt +1 -0
  38. crossrs-0.1.0/tests/test_crossrs.py +411 -0
crossrs-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Danylo Mysak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
crossrs-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: crossrs
3
+ Version: 0.1.0
4
+ Summary: Command-line tool for learning foreign languages through reverse translation of word-based sentences
5
+ Home-page: https://github.com/danmysak/crossrs
6
+ Author: Danylo Mysak
7
+ Author-email: danmysak@gmail.com
8
+ Project-URL: Bug Tracker, https://github.com/danmysak/crossrs/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.13
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: openai
16
+ Requires-Dist: prompt_toolkit
17
+ Requires-Dist: pydantic
18
+ Requires-Dist: regex
19
+ Requires-Dist: rich
20
+ Requires-Dist: SQLAlchemy
21
+ Requires-Dist: tqdm
22
+ Requires-Dist: typer
23
+ Dynamic: license-file
24
+
25
+ # CrossRS
26
+
27
+ CrossRS is a command-line tool for improving language **production** skills through reverse translation exercises. Given a corpus in your target language, CrossRS translates sentences into a source language you already know and asks you to translate them back, reinforcing vocabulary and grammar through word-based spaced repetition. Because CrossRS uses GPT under the hood, you must have a paid [OpenAI account](https://platform.openai.com/) and an [API key](https://platform.openai.com/api-keys) to run it.
28
+
29
+ ## How It Works
30
+
31
+ CrossRS focuses on **words** sorted by their frequency in the corpus. You learn the most common ones first. Each study round:
32
+
33
+ 1. CrossRS picks a sentence containing the next word to learn.
34
+ 2. The sentence is translated into your source language and shown to you.
35
+ 3. You translate it back into the target language.
36
+ 4. CrossRS evaluates your translation and provides feedback — either a ✅ confirmation or a ❌ with a highlighted diff showing the minimal corrections needed.
37
+
38
+ Sentences you translate correctly on the first try are scheduled for a single review in **29 days 20 hours**. Otherwise, they enter a spaced-repetition queue with reviews at **20 hours**, **6 days 20 hours**, and **29 days 20 hours** before being marked as learned.
39
+
40
+ ## Installation
41
+
42
+ Install [Python](https://www.python.org/downloads/) **3.13 or later** and [pipx](https://pipx.pypa.io/stable/installation/), then run:
43
+
44
+ ```bash
45
+ pipx install crossrs # install
46
+ pipx upgrade crossrs # upgrade
47
+ pipx uninstall crossrs # uninstall
48
+ ```
49
+
50
+ ## Initialize a New Language
51
+
52
+ Prepare a plain-text file that contains **one sentence per line** in the language you want to learn. For example, you can download a monolingual corpus from [OPUS](https://opus.nlpl.eu/). Then run:
53
+
54
+ ```bash
55
+ crossrs init <target-lang> <corpus>
56
+ ```
57
+
58
+ `<target-lang>` is a language code (e.g., `de`, `fr`, `uk`), and `<corpus>` is the path to the corpus file.
59
+
60
+ ## Study a Language
61
+
62
+ ```bash
63
+ crossrs study <target-lang> <source-lang> [--threshold T] [--model <GPT_MODEL>] [--api-key <OPENAI_KEY>]
64
+ ```
65
+
66
+ * **`<target-lang>`** — the language code you initialized earlier.
67
+ * **`<source-lang>`** — the language you want sentences translated into (e.g., `en`).
68
+ * **`--threshold` / `-t`** — the learnedness threshold for words (default: 3). A word is considered fully learned once it has appeared in this many learned sentences.
69
+ * **`--model`** — the GPT model to use for translation and evaluation.
70
+ * **`--api-key`** — your OpenAI API key.
71
+
72
+ Instead of passing `--model` and `--api-key` each time, you can set the environment variables `CROSSRS_MODEL` and `CROSSRS_API_KEY`.
73
+
74
+ ## View Your Progress
75
+
76
+ ```bash
77
+ crossrs stats <target-lang> [--threshold T]
78
+ ```
79
+
80
+ Displays:
81
+ - **Sentences**: learned + in queue / total
82
+ - **Words**: learned / total, with word-level coverage
83
+ - **Total rounds**: the number of translation attempts so far
84
+
85
+ ## Other Commands
86
+
87
+ ```bash
88
+ crossrs path <target-lang> # show the path to the language data file
89
+ crossrs delete <target-lang> [--force] # delete the language data file; use --force to skip the confirmation prompt
90
+ ```
@@ -0,0 +1,66 @@
1
+ # CrossRS
2
+
3
+ CrossRS is a command-line tool for improving language **production** skills through reverse translation exercises. Given a corpus in your target language, CrossRS translates sentences into a source language you already know and asks you to translate them back, reinforcing vocabulary and grammar through word-based spaced repetition. Because CrossRS uses GPT under the hood, you must have a paid [OpenAI account](https://platform.openai.com/) and an [API key](https://platform.openai.com/api-keys) to run it.
4
+
5
+ ## How It Works
6
+
7
+ CrossRS focuses on **words** sorted by their frequency in the corpus. You learn the most common ones first. Each study round:
8
+
9
+ 1. CrossRS picks a sentence containing the next word to learn.
10
+ 2. The sentence is translated into your source language and shown to you.
11
+ 3. You translate it back into the target language.
12
+ 4. CrossRS evaluates your translation and provides feedback — either a ✅ confirmation or a ❌ with a highlighted diff showing the minimal corrections needed.
13
+
14
+ Sentences you translate correctly on the first try are scheduled for a single review in **29 days 20 hours**. Otherwise, they enter a spaced-repetition queue with reviews at **20 hours**, **6 days 20 hours**, and **29 days 20 hours** before being marked as learned.
15
+
16
+ ## Installation
17
+
18
+ Install [Python](https://www.python.org/downloads/) **3.13 or later** and [pipx](https://pipx.pypa.io/stable/installation/), then run:
19
+
20
+ ```bash
21
+ pipx install crossrs # install
22
+ pipx upgrade crossrs # upgrade
23
+ pipx uninstall crossrs # uninstall
24
+ ```
25
+
26
+ ## Initialize a New Language
27
+
28
+ Prepare a plain-text file that contains **one sentence per line** in the language you want to learn. For example, you can download a monolingual corpus from [OPUS](https://opus.nlpl.eu/). Then run:
29
+
30
+ ```bash
31
+ crossrs init <target-lang> <corpus>
32
+ ```
33
+
34
+ `<target-lang>` is a language code (e.g., `de`, `fr`, `uk`), and `<corpus>` is the path to the corpus file.
35
+
36
+ ## Study a Language
37
+
38
+ ```bash
39
+ crossrs study <target-lang> <source-lang> [--threshold T] [--model <GPT_MODEL>] [--api-key <OPENAI_KEY>]
40
+ ```
41
+
42
+ * **`<target-lang>`** — the language code you initialized earlier.
43
+ * **`<source-lang>`** — the language you want sentences translated into (e.g., `en`).
44
+ * **`--threshold` / `-t`** — the learnedness threshold for words (default: 3). A word is considered fully learned once it has appeared in this many learned sentences.
45
+ * **`--model`** — the GPT model to use for translation and evaluation.
46
+ * **`--api-key`** — your OpenAI API key.
47
+
48
+ Instead of passing `--model` and `--api-key` each time, you can set the environment variables `CROSSRS_MODEL` and `CROSSRS_API_KEY`.
49
+
50
+ ## View Your Progress
51
+
52
+ ```bash
53
+ crossrs stats <target-lang> [--threshold T]
54
+ ```
55
+
56
+ Displays:
57
+ - **Sentences**: learned + in queue / total
58
+ - **Words**: learned / total, with word-level coverage
59
+ - **Total rounds**: the number of translation attempts so far
60
+
61
+ ## Other Commands
62
+
63
+ ```bash
64
+ crossrs path <target-lang> # show the path to the language data file
65
+ crossrs delete <target-lang> [--force] # delete the language data file; use --force to skip the confirmation prompt
66
+ ```
@@ -0,0 +1,6 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=42",
4
+ "wheel"
5
+ ]
6
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,42 @@
1
+ [metadata]
2
+ name = crossrs
3
+ version = 0.1.0
4
+ author = Danylo Mysak
5
+ author_email = danmysak@gmail.com
6
+ description = Command-line tool for learning foreign languages through reverse translation of word-based sentences
7
+ long_description = file: README.md
8
+ long_description_content_type = text/markdown
9
+ url = https://github.com/danmysak/crossrs
10
+ project_urls =
11
+ Bug Tracker = https://github.com/danmysak/crossrs/issues
12
+ classifiers =
13
+ Programming Language :: Python :: 3
14
+ License :: OSI Approved :: MIT License
15
+ Operating System :: OS Independent
16
+
17
+ [options]
18
+ package_dir =
19
+ =src
20
+ packages = find:
21
+ python_requires = >=3.13
22
+ install_requires =
23
+ openai
24
+ prompt_toolkit
25
+ pydantic
26
+ regex
27
+ rich
28
+ SQLAlchemy
29
+ tqdm
30
+ typer
31
+
32
+ [options.entry_points]
33
+ console_scripts =
34
+ crossrs = crossrs.crossrs:app
35
+
36
+ [options.packages.find]
37
+ where = src
38
+
39
+ [egg_info]
40
+ tag_build =
41
+ tag_date = 0
42
+
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,6 @@
1
+ from . import commands # Register commands with Typer
2
+ from .app import app
3
+
4
+ __all__ = [
5
+ 'app',
6
+ ]
@@ -0,0 +1,7 @@
1
+ import typer
2
+
3
+ __all__ = [
4
+ 'app',
5
+ ]
6
+
7
+ app = typer.Typer(pretty_exceptions_enable=False)
@@ -0,0 +1,13 @@
1
+ from .delete import delete
2
+ from .init import init
3
+ from .path import path
4
+ from .stats import stats
5
+ from .study import study
6
+
7
+ __all__ = [
8
+ 'delete',
9
+ 'init',
10
+ 'path',
11
+ 'stats',
12
+ 'study',
13
+ ]
@@ -0,0 +1,40 @@
1
+ from typing import Annotated
2
+
3
+ from typer import Argument, Option
4
+
5
+ from crossrs.app.app import app
6
+ from crossrs.db import get_path
7
+ from crossrs.utils.typer import typer_raise
8
+
9
+ __all__ = [
10
+ 'delete',
11
+ ]
12
+
13
+
14
+ @app.command()
15
+ def delete(
16
+ language: Annotated[
17
+ str,
18
+ Argument(help='Target language code whose data should be removed.'),
19
+ ],
20
+ force: Annotated[
21
+ bool | None,
22
+ Option(
23
+ '--force',
24
+ '-f',
25
+ help='Skip the confirmation prompt and delete immediately.',
26
+ ),
27
+ ] = False,
28
+ ) -> None:
29
+ """Delete all stored data for `language`."""
30
+ path = get_path(language)
31
+ if not path.exists():
32
+ typer_raise(f'Language "{language}" is not initialized.')
33
+
34
+ if not force and not input(
35
+ f'Are you sure you want to delete all data for "{language}"? (y/N): ',
36
+ ).lower().startswith('y'):
37
+ typer_raise('Operation canceled.')
38
+
39
+ path.unlink(missing_ok=True)
40
+ print(f'Deleted all data for "{language}".')
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+ from collections import Counter
3
+ from pathlib import Path
4
+ from typing import Annotated, Generator
5
+
6
+ from sqlalchemy import text
7
+ from tqdm import tqdm
8
+ from typer import Argument
9
+
10
+ from crossrs.app.app import app
11
+ from crossrs.db import get_session, Session
12
+ from crossrs.db.models import Metadata, Word, Sentence, SentenceWord
13
+ from crossrs.diff.tokenizer import tokenize
14
+ from crossrs.utils.typer import typer_raise
15
+
16
+ __all__ = [
17
+ 'init',
18
+ ]
19
+
20
+
21
+ def get_sentences(corpus: Path) -> Generator[str, None, None]:
22
+ """Yield sentences from the corpus file."""
23
+ with corpus.open('r', encoding='utf-8') as file:
24
+ for line in file:
25
+ if sentence := line.strip():
26
+ yield sentence
27
+
28
+
29
+ def extract_tokens(sentence: str) -> list[str]:
30
+ """Extract normalized tokens from a sentence."""
31
+ return [token.normalized for token in tokenize(sentence)]
32
+
33
+
34
+ def extract_words(tokens: list[str]) -> list[str]:
35
+ """Extract words from a list of tokens."""
36
+ return list(tokens)
37
+
38
+
39
+ def process_corpus(corpus: Path) -> tuple[dict[str, set[str]], Counter[str]]:
40
+ """Process the corpus and return unique words by sentence and word frequencies."""
41
+ words_by_sentence: dict[str, set[str]] = {}
42
+ word_frequencies: Counter[str] = Counter()
43
+
44
+ for sentence in tqdm(get_sentences(corpus), desc='Processing sentences'):
45
+ if sentence not in words_by_sentence:
46
+ tokens = extract_tokens(sentence)
47
+ if tokens:
48
+ words = extract_words(tokens)
49
+ words_by_sentence[sentence] = set(words)
50
+ word_frequencies.update(words)
51
+
52
+ return words_by_sentence, word_frequencies
53
+
54
+
55
+ def add_words(session: Session, frequencies: Counter[str]) -> dict[str, int]:
56
+ """Add words to the database sorted by frequency and return a mapping of words to their IDs."""
57
+ ids_by_word: dict[str, int] = {}
58
+ for word, occurrences in tqdm(frequencies.most_common(), desc='Adding words'):
59
+ word_obj = Word(
60
+ word=word,
61
+ occurrences=occurrences,
62
+ learnedness=0,
63
+ )
64
+ session.add(word_obj)
65
+ session.flush()
66
+ ids_by_word[word] = word_obj.id
67
+ return ids_by_word
68
+
69
+
70
+ def add_sentences(session: Session, words_by_sentence: dict[str, set[str]],
71
+ ids_by_word: dict[str, int]) -> None:
72
+ """Add sentences to the database with their word associations."""
73
+ for sentence_text, words in tqdm(words_by_sentence.items(), desc='Adding sentences'):
74
+ sentence_obj = Sentence(sentence=sentence_text)
75
+ session.add(sentence_obj)
76
+ session.flush()
77
+ for word in words:
78
+ session.add(SentenceWord(
79
+ sentence_id=sentence_obj.id,
80
+ word_id=ids_by_word[word],
81
+ ))
82
+ session.flush()
83
+
84
+
85
+ @app.command()
86
+ def init(
87
+ language: Annotated[
88
+ str,
89
+ Argument(help='Target language code (e.g., "de", "fr", "uk").'),
90
+ ],
91
+ corpus: Annotated[
92
+ Path,
93
+ Argument(
94
+ dir_okay=False,
95
+ exists=True,
96
+ readable=True,
97
+ resolve_path=True,
98
+ help='Plain-text file containing one sentence per line.',
99
+ ),
100
+ ],
101
+ ) -> None:
102
+ """Initialize CrossRS for a new target language."""
103
+ with get_session(language) as session:
104
+ if session.query(Sentence).limit(1).first():
105
+ typer_raise(f'CrossRS is already initialized for language "{language}".')
106
+ words_by_sentence, word_frequencies = process_corpus(corpus)
107
+ if not word_frequencies:
108
+ typer_raise('No valid sentences found in the corpus.')
109
+ ids_by_word = add_words(session, word_frequencies)
110
+ add_sentences(session, words_by_sentence, ids_by_word)
111
+ session.add(Metadata(id=1, total_rounds=0))
112
+ print('Committing changes to the database...')
113
+ session.commit()
114
+ print('Optimizing the database...')
115
+ session.execute(text('vacuum'))
116
+ print(f'Initialized CrossRS for language "{language}" '
117
+ f'with {len(words_by_sentence)} unique sentences '
118
+ f'and {len(word_frequencies)} unique words.')
@@ -0,0 +1,26 @@
1
+ from typing import Annotated
2
+
3
+ from typer import Argument
4
+
5
+ from crossrs.app.app import app
6
+ from crossrs.db import get_path
7
+ from crossrs.utils.typer import typer_raise
8
+
9
+ __all__ = [
10
+ 'path',
11
+ ]
12
+
13
+
14
+ @app.command()
15
+ def path(
16
+ language: Annotated[
17
+ str,
18
+ Argument(help='Target language code whose data file should be printed.'),
19
+ ],
20
+ ) -> None:
21
+ """Print the absolute path to CrossRS's data file for `language`."""
22
+ language_path = get_path(language)
23
+ if language_path.exists():
24
+ print(language_path.absolute())
25
+ else:
26
+ typer_raise(f'Language "{language}" is not initialized.')
@@ -0,0 +1,138 @@
1
+ from __future__ import annotations
2
+ from typing import Annotated
3
+
4
+ from dataclasses import dataclass
5
+ from rich.console import Console
6
+ from rich.text import Text
7
+ from sqlalchemy import func, case
8
+ from typer import Argument, Option
9
+
10
+ from crossrs.app.app import app
11
+ from crossrs.db import get_session, Session
12
+ from crossrs.db.models import Metadata, Word, Sentence
13
+
14
+ __all__ = [
15
+ 'stats',
16
+ ]
17
+
18
+ DEFAULT_THRESHOLD = 3
19
+
20
+
21
+ @dataclass
22
+ class WordStatsItem:
23
+ learned: int = 0
24
+ total: int = 0
25
+ learned_occurrences: int = 0
26
+ total_occurrences: int = 0
27
+
28
+
29
+ @dataclass
30
+ class SentenceStatsData:
31
+ learned: int = 0
32
+ in_queue: int = 0
33
+ total: int = 0
34
+ total_rounds: int = 0
35
+ targeted_words: int = 0
36
+
37
+
38
+ def compute_word_stats(session: Session, threshold: int) -> WordStatsItem:
39
+ """Compute word statistics using SQL aggregation."""
40
+ row = session.query(
41
+ func.count(Word.id),
42
+ func.sum(Word.occurrences),
43
+ func.sum(case((Word.learnedness >= threshold, 1), else_=0)),
44
+ func.sum(case((Word.learnedness >= threshold, Word.occurrences), else_=0)),
45
+ ).one()
46
+
47
+ return WordStatsItem(
48
+ learned=int(row[2] or 0),
49
+ total=int(row[0] or 0),
50
+ learned_occurrences=int(row[3] or 0),
51
+ total_occurrences=int(row[1] or 0),
52
+ )
53
+
54
+
55
+ def compute_sentence_stats(session: Session, threshold: int) -> SentenceStatsData:
56
+ """Compute sentence statistics using SQL aggregation."""
57
+ row = session.query(
58
+ func.count(Sentence.id),
59
+ func.sum(case((Sentence.status == 2, 1), else_=0)),
60
+ func.sum(case((Sentence.status == 1, 1), else_=0)),
61
+ ).one()
62
+
63
+ meta = session.get(Metadata, 1)
64
+ total_rounds = meta.total_rounds if meta else 0
65
+
66
+ targeted_words = session.query(
67
+ func.count(func.distinct(Sentence.target_word_id)),
68
+ ).filter(
69
+ Sentence.status == 1,
70
+ Sentence.target_word_id.is_not(None),
71
+ ).join(Word, Sentence.target_word_id == Word.id).filter(
72
+ Word.learnedness < threshold,
73
+ ).scalar()
74
+
75
+ return SentenceStatsData(
76
+ total=int(row[0]),
77
+ total_rounds=total_rounds,
78
+ learned=int(row[1] or 0),
79
+ in_queue=int(row[2] or 0),
80
+ targeted_words=int(targeted_words or 0),
81
+ )
82
+
83
+
84
+ def format_section_title(title: str) -> Text:
85
+ return Text(title, style='bold underline')
86
+
87
+
88
+ def format_stats_label(title: str) -> Text:
89
+ return Text(f'{title}:', style='bold')
90
+
91
+
92
+ @app.command()
93
+ def stats(
94
+ language: Annotated[
95
+ str,
96
+ Argument(help='Target language code to show statistics for.'),
97
+ ],
98
+ threshold: Annotated[
99
+ int,
100
+ Option(
101
+ '--threshold', '-t',
102
+ help='Learnedness threshold for words to be considered fully learned.',
103
+ ),
104
+ ] = DEFAULT_THRESHOLD,
105
+ ) -> None:
106
+ """Display study statistics for the given language."""
107
+ with get_session(language) as session:
108
+ word_stats = compute_word_stats(session, threshold)
109
+ sentence_stats = compute_sentence_stats(session, threshold)
110
+
111
+ console = Console(highlight=False)
112
+
113
+ # Sentence statistics
114
+ console.print(format_section_title('Sentence Statistics'))
115
+ console.print(
116
+ format_stats_label('Sentences'),
117
+ Text(f'{sentence_stats.learned} learned + {sentence_stats.in_queue} in queue '
118
+ f'/ {sentence_stats.total} total'),
119
+ )
120
+ console.print()
121
+
122
+ # Word statistics
123
+ console.print(format_section_title('Word Statistics'))
124
+ text = Text(f'{word_stats.learned} learned / {word_stats.total} total')
125
+ if word_stats.total_occurrences > 0:
126
+ coverage = word_stats.learned_occurrences / word_stats.total_occurrences
127
+ text.append(f' ')
128
+ text.append(Text(f'(coverage: {coverage:.1%})', style='dim'))
129
+ console.print(format_stats_label('Words'), text)
130
+ if sentence_stats.targeted_words > 0:
131
+ console.print(
132
+ format_stats_label('Targeted'),
133
+ Text(f'{sentence_stats.targeted_words} unlearned words in queue'),
134
+ )
135
+ console.print()
136
+
137
+ # Total rounds
138
+ console.print(format_stats_label('Total rounds'), sentence_stats.total_rounds)