nl-processing 0.5.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nl_processing-1.0.2/PKG-INFO +119 -0
- nl_processing-1.0.2/README.md +105 -0
- nl_processing-1.0.2/nl_processing.egg-info/PKG-INFO +119 -0
- nl_processing-1.0.2/nl_processing.egg-info/SOURCES.txt +52 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2}/nl_processing.egg-info/requires.txt +2 -1
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/core/src}/nl_processing/core/models.py +19 -0
- nl_processing-1.0.2/packages/core/src/nl_processing/core/ports.py +25 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/exercise_progress.py +57 -25
- nl_processing-1.0.2/packages/database/src/nl_processing/database/models.py +7 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/service.py +21 -11
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/local_store.py +3 -7
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/service.py +11 -6
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/sync.py +4 -3
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/image_encoding.py +1 -2
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py +1 -1
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_words_from_text/src}/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py +1 -1
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_words_from_text/src}/nl_processing/extract_words_from_text/service.py +1 -2
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/sampling/src}/nl_processing/sampling/service.py +3 -11
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_text/src}/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py +1 -1
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_text/src}/nl_processing/translate_text/service.py +1 -2
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_word/src}/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py +1 -1
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_word/src}/nl_processing/translate_word/service.py +1 -2
- nl_processing-1.0.2/pyproject.toml +67 -0
- nl_processing-0.5.0/PKG-INFO +0 -109
- nl_processing-0.5.0/README.md +0 -96
- nl_processing-0.5.0/nl_processing/core/scripts/prompt_author.py +0 -59
- nl_processing-0.5.0/nl_processing/database/models.py +0 -19
- nl_processing-0.5.0/nl_processing/translate_word/__init__.py +0 -0
- nl_processing-0.5.0/nl_processing.egg-info/PKG-INFO +0 -109
- nl_processing-0.5.0/nl_processing.egg-info/SOURCES.txt +0 -53
- nl_processing-0.5.0/pyproject.toml +0 -39
- {nl_processing-0.5.0 → nl_processing-1.0.2}/nl_processing.egg-info/dependency_links.txt +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2}/nl_processing.egg-info/top_level.txt +0 -0
- {nl_processing-0.5.0/nl_processing → nl_processing-1.0.2/packages/core/src/nl_processing/core}/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/core/src}/nl_processing/core/exceptions.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/core/src}/nl_processing/core/prompts.py +0 -0
- {nl_processing-0.5.0/nl_processing/core → nl_processing-1.0.2/packages/database/src/nl_processing/database}/__init__.py +0 -0
- {nl_processing-0.5.0/nl_processing/database → nl_processing-1.0.2/packages/database/src/nl_processing/database/backend}/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/_neon_exercise.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/_queries.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/abstract.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/neon.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/exceptions.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/logging.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/testing.py +0 -0
- {nl_processing-0.5.0/nl_processing/database/backend → nl_processing-1.0.2/packages/database_cache/src/nl_processing/database_cache}/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/_local_store_queries.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/exceptions.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/logging.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/models.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/benchmark.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/prompts/nl.json +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/service.py +2 -2
- {nl_processing-0.5.0/nl_processing/database_cache → nl_processing-1.0.2/packages/extract_words_from_text/src/nl_processing/extract_words_from_text}/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_words_from_text/src}/nl_processing/extract_words_from_text/prompts/nl.json +0 -0
- {nl_processing-0.5.0/nl_processing/extract_words_from_text → nl_processing-1.0.2/packages/sampling/src/nl_processing/sampling}/__init__.py +0 -0
- {nl_processing-0.5.0/nl_processing/sampling → nl_processing-1.0.2/packages/translate_text/src/nl_processing/translate_text}/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_text/src}/nl_processing/translate_text/prompts/nl_ru.json +0 -0
- {nl_processing-0.5.0/nl_processing/translate_text → nl_processing-1.0.2/packages/translate_word/src/nl_processing/translate_word}/__init__.py +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_word/src}/nl_processing/translate_word/prompts/nl_ru.json +0 -0
- {nl_processing-0.5.0 → nl_processing-1.0.2}/setup.cfg +0 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nl_processing
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: Aggregate build for the nl_processing multi-package repository
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
8
|
+
Requires-Dist: langchain-core<1,>=0.3
|
|
9
|
+
Requires-Dist: langchain-openai<1,>=0.3
|
|
10
|
+
Requires-Dist: numpy<3,>=2.0
|
|
11
|
+
Requires-Dist: opencv-python<5,>=4.10
|
|
12
|
+
Requires-Dist: asyncpg<1,>=0.30
|
|
13
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
14
|
+
|
|
15
|
+
# nl_processing
|
|
16
|
+
|
|
17
|
+
[](https://pypi.org/project/nl_processing/)
|
|
18
|
+
|
|
19
|
+
Dutch language processing toolkit organized as a multi-package Python repository.
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install nl_processing
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The published `nl_processing` package is the aggregate build from the repo root. Day-to-day development happens inside the package folders under `packages/`.
|
|
28
|
+
|
|
29
|
+
## Repository Layout
|
|
30
|
+
|
|
31
|
+
```text
|
|
32
|
+
packages/
|
|
33
|
+
core/
|
|
34
|
+
extract_text_from_image/
|
|
35
|
+
extract_words_from_text/
|
|
36
|
+
translate_text/
|
|
37
|
+
translate_word/
|
|
38
|
+
database/
|
|
39
|
+
database_cache/
|
|
40
|
+
sampling/
|
|
41
|
+
docs/
|
|
42
|
+
pyproject.toml # aggregate build for the published nl_processing package
|
|
43
|
+
Makefile # repo-wide lint/test entrypoint
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Each package has its own:
|
|
47
|
+
|
|
48
|
+
- `pyproject.toml`
|
|
49
|
+
- `ruff.toml`
|
|
50
|
+
- `pytest.ini`
|
|
51
|
+
- `tests/`
|
|
52
|
+
- `docs/`
|
|
53
|
+
|
|
54
|
+
## Modules
|
|
55
|
+
|
|
56
|
+
| Module | Class | Description | Docs |
|
|
57
|
+
|---|---|---|---|
|
|
58
|
+
| `core` | N/A | Shared models, ports, exceptions, and prompt helpers | [docs](packages/core/docs/module-spec.md) |
|
|
59
|
+
| `extract_text_from_image` | `ImageTextExtractor` | Extract Dutch text from images via Vision API | [docs](packages/extract_text_from_image/docs/module-spec.md) |
|
|
60
|
+
| `extract_words_from_text` | `WordExtractor` | Extract and normalize words from markdown text | [docs](packages/extract_words_from_text/docs/module-spec.md) |
|
|
61
|
+
| `translate_text` | `TextTranslator` | Translate text (NL -> RU) with markdown preservation | [docs](packages/translate_text/docs/module-spec.md) |
|
|
62
|
+
| `translate_word` | `WordTranslator` | Batch-translate words (NL -> RU) | [docs](packages/translate_word/docs/module-spec.md) |
|
|
63
|
+
| `database` | `DatabaseService` | Remote source of truth and default progress/sync provider | [docs](packages/database/docs/module-spec.md) |
|
|
64
|
+
| `database_cache` | `DatabaseCacheService` | Local-first SQLite cache with injectable remote progress sync | [docs](packages/database_cache/docs/module-spec.md) |
|
|
65
|
+
| `sampling` | `WordSampler` | Weighted word sampling over any compatible scored-pair provider | [docs](packages/sampling/docs/module-spec.md) |
|
|
66
|
+
|
|
67
|
+
## Development
|
|
68
|
+
|
|
69
|
+
Work inside one package when you only touch one module:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
cd packages/translate_word
|
|
73
|
+
uv sync --all-groups
|
|
74
|
+
uv run pytest tests/unit
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run the repo-wide quality gate from the root:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
make check
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Useful package-local examples:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
cd packages/core
|
|
87
|
+
uv run pytest tests/unit/core
|
|
88
|
+
|
|
89
|
+
cd packages/database
|
|
90
|
+
doppler run -- uv run pytest tests/integration/database
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Dependency Rule
|
|
94
|
+
|
|
95
|
+
Modules are independent packages. Cross-module dependencies must be explicit in the consuming package's `pyproject.toml`.
|
|
96
|
+
|
|
97
|
+
Shared cross-module storage contracts live in `nl_processing.core.ports`. `database` and `database_cache` are concrete implementations and adapters, not the owners of those shared interfaces.
|
|
98
|
+
|
|
99
|
+
One intentional design change in this layout: `database` no longer imports `translate_word` directly. If you want automatic translation on `add_words()`, compose it explicitly:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from nl_processing.core.models import Language
|
|
103
|
+
from nl_processing.database.service import DatabaseService
|
|
104
|
+
from nl_processing.translate_word.service import WordTranslator
|
|
105
|
+
|
|
106
|
+
db = DatabaseService(
|
|
107
|
+
user_id="alex",
|
|
108
|
+
translator=WordTranslator(
|
|
109
|
+
source_language=Language.NL,
|
|
110
|
+
target_language=Language.RU,
|
|
111
|
+
),
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Docs
|
|
116
|
+
|
|
117
|
+
- Repository module spec: [docs/module-spec.md](docs/module-spec.md)
|
|
118
|
+
- Environment variables: [docs/ENV_VARS.md](docs/ENV_VARS.md)
|
|
119
|
+
- Release workflow: [docs/REALEASE_WORKFLOW.md](docs/REALEASE_WORKFLOW.md)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# nl_processing
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/nl_processing/)
|
|
4
|
+
|
|
5
|
+
Dutch language processing toolkit organized as a multi-package Python repository.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install nl_processing
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
The published `nl_processing` package is the aggregate build from the repo root. Day-to-day development happens inside the package folders under `packages/`.
|
|
14
|
+
|
|
15
|
+
## Repository Layout
|
|
16
|
+
|
|
17
|
+
```text
|
|
18
|
+
packages/
|
|
19
|
+
core/
|
|
20
|
+
extract_text_from_image/
|
|
21
|
+
extract_words_from_text/
|
|
22
|
+
translate_text/
|
|
23
|
+
translate_word/
|
|
24
|
+
database/
|
|
25
|
+
database_cache/
|
|
26
|
+
sampling/
|
|
27
|
+
docs/
|
|
28
|
+
pyproject.toml # aggregate build for the published nl_processing package
|
|
29
|
+
Makefile # repo-wide lint/test entrypoint
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Each package has its own:
|
|
33
|
+
|
|
34
|
+
- `pyproject.toml`
|
|
35
|
+
- `ruff.toml`
|
|
36
|
+
- `pytest.ini`
|
|
37
|
+
- `tests/`
|
|
38
|
+
- `docs/`
|
|
39
|
+
|
|
40
|
+
## Modules
|
|
41
|
+
|
|
42
|
+
| Module | Class | Description | Docs |
|
|
43
|
+
|---|---|---|---|
|
|
44
|
+
| `core` | N/A | Shared models, ports, exceptions, and prompt helpers | [docs](packages/core/docs/module-spec.md) |
|
|
45
|
+
| `extract_text_from_image` | `ImageTextExtractor` | Extract Dutch text from images via Vision API | [docs](packages/extract_text_from_image/docs/module-spec.md) |
|
|
46
|
+
| `extract_words_from_text` | `WordExtractor` | Extract and normalize words from markdown text | [docs](packages/extract_words_from_text/docs/module-spec.md) |
|
|
47
|
+
| `translate_text` | `TextTranslator` | Translate text (NL -> RU) with markdown preservation | [docs](packages/translate_text/docs/module-spec.md) |
|
|
48
|
+
| `translate_word` | `WordTranslator` | Batch-translate words (NL -> RU) | [docs](packages/translate_word/docs/module-spec.md) |
|
|
49
|
+
| `database` | `DatabaseService` | Remote source of truth and default progress/sync provider | [docs](packages/database/docs/module-spec.md) |
|
|
50
|
+
| `database_cache` | `DatabaseCacheService` | Local-first SQLite cache with injectable remote progress sync | [docs](packages/database_cache/docs/module-spec.md) |
|
|
51
|
+
| `sampling` | `WordSampler` | Weighted word sampling over any compatible scored-pair provider | [docs](packages/sampling/docs/module-spec.md) |
|
|
52
|
+
|
|
53
|
+
## Development
|
|
54
|
+
|
|
55
|
+
Work inside one package when you only touch one module:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
cd packages/translate_word
|
|
59
|
+
uv sync --all-groups
|
|
60
|
+
uv run pytest tests/unit
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Run the repo-wide quality gate from the root:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
make check
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Useful package-local examples:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
cd packages/core
|
|
73
|
+
uv run pytest tests/unit/core
|
|
74
|
+
|
|
75
|
+
cd packages/database
|
|
76
|
+
doppler run -- uv run pytest tests/integration/database
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Dependency Rule
|
|
80
|
+
|
|
81
|
+
Modules are independent packages. Cross-module dependencies must be explicit in the consuming package's `pyproject.toml`.
|
|
82
|
+
|
|
83
|
+
Shared cross-module storage contracts live in `nl_processing.core.ports`. `database` and `database_cache` are concrete implementations and adapters, not the owners of those shared interfaces.
|
|
84
|
+
|
|
85
|
+
One intentional design change in this layout: `database` no longer imports `translate_word` directly. If you want automatic translation on `add_words()`, compose it explicitly:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from nl_processing.core.models import Language
|
|
89
|
+
from nl_processing.database.service import DatabaseService
|
|
90
|
+
from nl_processing.translate_word.service import WordTranslator
|
|
91
|
+
|
|
92
|
+
db = DatabaseService(
|
|
93
|
+
user_id="alex",
|
|
94
|
+
translator=WordTranslator(
|
|
95
|
+
source_language=Language.NL,
|
|
96
|
+
target_language=Language.RU,
|
|
97
|
+
),
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Docs
|
|
102
|
+
|
|
103
|
+
- Repository module spec: [docs/module-spec.md](docs/module-spec.md)
|
|
104
|
+
- Environment variables: [docs/ENV_VARS.md](docs/ENV_VARS.md)
|
|
105
|
+
- Release workflow: [docs/REALEASE_WORKFLOW.md](docs/REALEASE_WORKFLOW.md)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nl_processing
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: Aggregate build for the nl_processing multi-package repository
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
8
|
+
Requires-Dist: langchain-core<1,>=0.3
|
|
9
|
+
Requires-Dist: langchain-openai<1,>=0.3
|
|
10
|
+
Requires-Dist: numpy<3,>=2.0
|
|
11
|
+
Requires-Dist: opencv-python<5,>=4.10
|
|
12
|
+
Requires-Dist: asyncpg<1,>=0.30
|
|
13
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
14
|
+
|
|
15
|
+
# nl_processing
|
|
16
|
+
|
|
17
|
+
[](https://pypi.org/project/nl_processing/)
|
|
18
|
+
|
|
19
|
+
Dutch language processing toolkit organized as a multi-package Python repository.
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install nl_processing
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The published `nl_processing` package is the aggregate build from the repo root. Day-to-day development happens inside the package folders under `packages/`.
|
|
28
|
+
|
|
29
|
+
## Repository Layout
|
|
30
|
+
|
|
31
|
+
```text
|
|
32
|
+
packages/
|
|
33
|
+
core/
|
|
34
|
+
extract_text_from_image/
|
|
35
|
+
extract_words_from_text/
|
|
36
|
+
translate_text/
|
|
37
|
+
translate_word/
|
|
38
|
+
database/
|
|
39
|
+
database_cache/
|
|
40
|
+
sampling/
|
|
41
|
+
docs/
|
|
42
|
+
pyproject.toml # aggregate build for the published nl_processing package
|
|
43
|
+
Makefile # repo-wide lint/test entrypoint
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Each package has its own:
|
|
47
|
+
|
|
48
|
+
- `pyproject.toml`
|
|
49
|
+
- `ruff.toml`
|
|
50
|
+
- `pytest.ini`
|
|
51
|
+
- `tests/`
|
|
52
|
+
- `docs/`
|
|
53
|
+
|
|
54
|
+
## Modules
|
|
55
|
+
|
|
56
|
+
| Module | Class | Description | Docs |
|
|
57
|
+
|---|---|---|---|
|
|
58
|
+
| `core` | N/A | Shared models, ports, exceptions, and prompt helpers | [docs](packages/core/docs/module-spec.md) |
|
|
59
|
+
| `extract_text_from_image` | `ImageTextExtractor` | Extract Dutch text from images via Vision API | [docs](packages/extract_text_from_image/docs/module-spec.md) |
|
|
60
|
+
| `extract_words_from_text` | `WordExtractor` | Extract and normalize words from markdown text | [docs](packages/extract_words_from_text/docs/module-spec.md) |
|
|
61
|
+
| `translate_text` | `TextTranslator` | Translate text (NL -> RU) with markdown preservation | [docs](packages/translate_text/docs/module-spec.md) |
|
|
62
|
+
| `translate_word` | `WordTranslator` | Batch-translate words (NL -> RU) | [docs](packages/translate_word/docs/module-spec.md) |
|
|
63
|
+
| `database` | `DatabaseService` | Remote source of truth and default progress/sync provider | [docs](packages/database/docs/module-spec.md) |
|
|
64
|
+
| `database_cache` | `DatabaseCacheService` | Local-first SQLite cache with injectable remote progress sync | [docs](packages/database_cache/docs/module-spec.md) |
|
|
65
|
+
| `sampling` | `WordSampler` | Weighted word sampling over any compatible scored-pair provider | [docs](packages/sampling/docs/module-spec.md) |
|
|
66
|
+
|
|
67
|
+
## Development
|
|
68
|
+
|
|
69
|
+
Work inside one package when you only touch one module:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
cd packages/translate_word
|
|
73
|
+
uv sync --all-groups
|
|
74
|
+
uv run pytest tests/unit
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run the repo-wide quality gate from the root:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
make check
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Useful package-local examples:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
cd packages/core
|
|
87
|
+
uv run pytest tests/unit/core
|
|
88
|
+
|
|
89
|
+
cd packages/database
|
|
90
|
+
doppler run -- uv run pytest tests/integration/database
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Dependency Rule
|
|
94
|
+
|
|
95
|
+
Modules are independent packages. Cross-module dependencies must be explicit in the consuming package's `pyproject.toml`.
|
|
96
|
+
|
|
97
|
+
Shared cross-module storage contracts live in `nl_processing.core.ports`. `database` and `database_cache` are concrete implementations and adapters, not the owners of those shared interfaces.
|
|
98
|
+
|
|
99
|
+
One intentional design change in this layout: `database` no longer imports `translate_word` directly. If you want automatic translation on `add_words()`, compose it explicitly:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from nl_processing.core.models import Language
|
|
103
|
+
from nl_processing.database.service import DatabaseService
|
|
104
|
+
from nl_processing.translate_word.service import WordTranslator
|
|
105
|
+
|
|
106
|
+
db = DatabaseService(
|
|
107
|
+
user_id="alex",
|
|
108
|
+
translator=WordTranslator(
|
|
109
|
+
source_language=Language.NL,
|
|
110
|
+
target_language=Language.RU,
|
|
111
|
+
),
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Docs
|
|
116
|
+
|
|
117
|
+
- Repository module spec: [docs/module-spec.md](docs/module-spec.md)
|
|
118
|
+
- Environment variables: [docs/ENV_VARS.md](docs/ENV_VARS.md)
|
|
119
|
+
- Release workflow: [docs/REALEASE_WORKFLOW.md](docs/REALEASE_WORKFLOW.md)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
nl_processing.egg-info/PKG-INFO
|
|
4
|
+
nl_processing.egg-info/SOURCES.txt
|
|
5
|
+
nl_processing.egg-info/dependency_links.txt
|
|
6
|
+
nl_processing.egg-info/requires.txt
|
|
7
|
+
nl_processing.egg-info/top_level.txt
|
|
8
|
+
packages/core/src/nl_processing/core/__init__.py
|
|
9
|
+
packages/core/src/nl_processing/core/exceptions.py
|
|
10
|
+
packages/core/src/nl_processing/core/models.py
|
|
11
|
+
packages/core/src/nl_processing/core/ports.py
|
|
12
|
+
packages/core/src/nl_processing/core/prompts.py
|
|
13
|
+
packages/database/src/nl_processing/database/__init__.py
|
|
14
|
+
packages/database/src/nl_processing/database/exceptions.py
|
|
15
|
+
packages/database/src/nl_processing/database/exercise_progress.py
|
|
16
|
+
packages/database/src/nl_processing/database/logging.py
|
|
17
|
+
packages/database/src/nl_processing/database/models.py
|
|
18
|
+
packages/database/src/nl_processing/database/service.py
|
|
19
|
+
packages/database/src/nl_processing/database/testing.py
|
|
20
|
+
packages/database/src/nl_processing/database/backend/__init__.py
|
|
21
|
+
packages/database/src/nl_processing/database/backend/_neon_exercise.py
|
|
22
|
+
packages/database/src/nl_processing/database/backend/_queries.py
|
|
23
|
+
packages/database/src/nl_processing/database/backend/abstract.py
|
|
24
|
+
packages/database/src/nl_processing/database/backend/neon.py
|
|
25
|
+
packages/database_cache/src/nl_processing/database_cache/__init__.py
|
|
26
|
+
packages/database_cache/src/nl_processing/database_cache/_local_store_queries.py
|
|
27
|
+
packages/database_cache/src/nl_processing/database_cache/exceptions.py
|
|
28
|
+
packages/database_cache/src/nl_processing/database_cache/local_store.py
|
|
29
|
+
packages/database_cache/src/nl_processing/database_cache/logging.py
|
|
30
|
+
packages/database_cache/src/nl_processing/database_cache/models.py
|
|
31
|
+
packages/database_cache/src/nl_processing/database_cache/service.py
|
|
32
|
+
packages/database_cache/src/nl_processing/database_cache/sync.py
|
|
33
|
+
packages/extract_text_from_image/src/nl_processing/extract_text_from_image/__init__.py
|
|
34
|
+
packages/extract_text_from_image/src/nl_processing/extract_text_from_image/benchmark.py
|
|
35
|
+
packages/extract_text_from_image/src/nl_processing/extract_text_from_image/image_encoding.py
|
|
36
|
+
packages/extract_text_from_image/src/nl_processing/extract_text_from_image/service.py
|
|
37
|
+
packages/extract_text_from_image/src/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py
|
|
38
|
+
packages/extract_text_from_image/src/nl_processing/extract_text_from_image/prompts/nl.json
|
|
39
|
+
packages/extract_words_from_text/src/nl_processing/extract_words_from_text/__init__.py
|
|
40
|
+
packages/extract_words_from_text/src/nl_processing/extract_words_from_text/service.py
|
|
41
|
+
packages/extract_words_from_text/src/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py
|
|
42
|
+
packages/extract_words_from_text/src/nl_processing/extract_words_from_text/prompts/nl.json
|
|
43
|
+
packages/sampling/src/nl_processing/sampling/__init__.py
|
|
44
|
+
packages/sampling/src/nl_processing/sampling/service.py
|
|
45
|
+
packages/translate_text/src/nl_processing/translate_text/__init__.py
|
|
46
|
+
packages/translate_text/src/nl_processing/translate_text/service.py
|
|
47
|
+
packages/translate_text/src/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py
|
|
48
|
+
packages/translate_text/src/nl_processing/translate_text/prompts/nl_ru.json
|
|
49
|
+
packages/translate_word/src/nl_processing/translate_word/__init__.py
|
|
50
|
+
packages/translate_word/src/nl_processing/translate_word/service.py
|
|
51
|
+
packages/translate_word/src/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py
|
|
52
|
+
packages/translate_word/src/nl_processing/translate_word/prompts/nl_ru.json
|
|
@@ -45,3 +45,22 @@ class Word(BaseModel):
|
|
|
45
45
|
normalized_form: str
|
|
46
46
|
word_type: PartOfSpeech
|
|
47
47
|
language: Language
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class WordPair(BaseModel):
|
|
51
|
+
source: Word
|
|
52
|
+
target: Word
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ScoredWordPair(BaseModel):
|
|
56
|
+
"""Translated pair with per-exercise scores and a stable source-word ID."""
|
|
57
|
+
|
|
58
|
+
pair: WordPair
|
|
59
|
+
scores: dict[str, int]
|
|
60
|
+
source_word_id: int
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class WordPairSnapshot(ScoredWordPair):
|
|
64
|
+
"""Remote sync snapshot with stable source and target IDs for cache rebuilds."""
|
|
65
|
+
|
|
66
|
+
target_word_id: int
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Protocol, runtime_checkable
|
|
2
|
+
|
|
3
|
+
from nl_processing.core.models import ScoredWordPair, WordPairSnapshot
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@runtime_checkable
|
|
7
|
+
class ScoredPairProvider(Protocol):
|
|
8
|
+
"""Provider of score-aware word pairs for sampling or practice flows."""
|
|
9
|
+
|
|
10
|
+
async def get_word_pairs_with_scores(self) -> list[ScoredWordPair]: ...
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@runtime_checkable
|
|
14
|
+
class RemoteProgressSyncPort(Protocol):
|
|
15
|
+
"""Remote sync contract consumed by the local cache layer."""
|
|
16
|
+
|
|
17
|
+
async def export_remote_snapshot(self) -> list[WordPairSnapshot]: ...
|
|
18
|
+
|
|
19
|
+
async def apply_score_delta(
|
|
20
|
+
self,
|
|
21
|
+
event_id: str,
|
|
22
|
+
source_word_id: int,
|
|
23
|
+
exercise_type: str,
|
|
24
|
+
delta: int,
|
|
25
|
+
) -> None: ...
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
"""ExerciseProgressStore — per-user, per-exercise score tracking.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
Default implementation of the shared scored-pair and remote-progress
|
|
4
|
+
sync contracts used by sampling and database_cache.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import os
|
|
8
8
|
|
|
9
|
-
from nl_processing.core.models import Language, PartOfSpeech, Word
|
|
9
|
+
from nl_processing.core.models import Language, PartOfSpeech, ScoredWordPair, Word, WordPair, WordPairSnapshot
|
|
10
|
+
|
|
11
|
+
from nl_processing.database.backend.abstract import AbstractBackend
|
|
10
12
|
from nl_processing.database.backend.neon import NeonBackend
|
|
11
13
|
from nl_processing.database.exceptions import ConfigurationError
|
|
12
|
-
from nl_processing.database.models import ScoredWordPair, WordPair
|
|
13
14
|
|
|
14
15
|
_DATABASE_URL_MISSING = (
|
|
15
16
|
"DATABASE_URL environment variable is required. "
|
|
@@ -27,7 +28,7 @@ def _read_database_url() -> str:
|
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class ExerciseProgressStore:
|
|
30
|
-
"""Per-user
|
|
31
|
+
"""Per-user progress store for score-aware reads and remote cache sync."""
|
|
31
32
|
|
|
32
33
|
def __init__(
|
|
33
34
|
self,
|
|
@@ -36,12 +37,16 @@ class ExerciseProgressStore:
|
|
|
36
37
|
source_language: Language,
|
|
37
38
|
target_language: Language,
|
|
38
39
|
exercise_types: list[str],
|
|
40
|
+
backend: AbstractBackend | None = None,
|
|
39
41
|
) -> None:
|
|
40
42
|
if not exercise_types:
|
|
41
43
|
msg = "exercise_types must be a non-empty list"
|
|
42
44
|
raise ValueError(msg)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
+
if backend is None:
|
|
46
|
+
database_url = _read_database_url()
|
|
47
|
+
self._backend: AbstractBackend = NeonBackend(database_url)
|
|
48
|
+
else:
|
|
49
|
+
self._backend = backend
|
|
45
50
|
self._user_id = user_id
|
|
46
51
|
self._source_language = source_language
|
|
47
52
|
self._target_language = target_language
|
|
@@ -78,23 +83,9 @@ class ExerciseProgressStore:
|
|
|
78
83
|
|
|
79
84
|
Missing scores default to 0 (FR33).
|
|
80
85
|
"""
|
|
81
|
-
rows = await self.
|
|
82
|
-
self._user_id,
|
|
83
|
-
self._source_language.value,
|
|
84
|
-
)
|
|
86
|
+
rows, scores_by_word = await self._get_rows_with_scores()
|
|
85
87
|
if not rows:
|
|
86
88
|
return []
|
|
87
|
-
source_word_ids = [int(row["source_id"]) for row in rows]
|
|
88
|
-
scores_by_word: dict[int, dict[str, int]] = {}
|
|
89
|
-
for et, table in self._score_tables.items():
|
|
90
|
-
score_rows = await self._backend.get_user_exercise_scores(
|
|
91
|
-
table,
|
|
92
|
-
self._user_id,
|
|
93
|
-
source_word_ids,
|
|
94
|
-
)
|
|
95
|
-
for score_row in score_rows:
|
|
96
|
-
wid = int(score_row["source_word_id"])
|
|
97
|
-
scores_by_word.setdefault(wid, {})[et] = int(score_row["score"])
|
|
98
89
|
result: list[ScoredWordPair] = []
|
|
99
90
|
for row in rows:
|
|
100
91
|
pair = self._row_to_word_pair(row)
|
|
@@ -106,9 +97,27 @@ class ExerciseProgressStore:
|
|
|
106
97
|
)
|
|
107
98
|
return result
|
|
108
99
|
|
|
109
|
-
async def export_remote_snapshot(self) -> list[
|
|
110
|
-
"""
|
|
111
|
-
|
|
100
|
+
async def export_remote_snapshot(self) -> list[WordPairSnapshot]:
|
|
101
|
+
"""Return score-aware pairs with stable remote IDs for cache consumers."""
|
|
102
|
+
rows, scores_by_word = await self._get_rows_with_scores()
|
|
103
|
+
if not rows:
|
|
104
|
+
return []
|
|
105
|
+
snapshots: list[WordPairSnapshot] = []
|
|
106
|
+
for row in rows:
|
|
107
|
+
pair = self._row_to_word_pair(row)
|
|
108
|
+
source_word_id = int(row["source_id"])
|
|
109
|
+
target_word_id = int(row["target_id"])
|
|
110
|
+
word_scores = scores_by_word.get(source_word_id, {})
|
|
111
|
+
scores = {et: word_scores.get(et, 0) for et in self._exercise_types}
|
|
112
|
+
snapshots.append(
|
|
113
|
+
WordPairSnapshot(
|
|
114
|
+
pair=pair,
|
|
115
|
+
scores=scores,
|
|
116
|
+
source_word_id=source_word_id,
|
|
117
|
+
target_word_id=target_word_id,
|
|
118
|
+
),
|
|
119
|
+
)
|
|
120
|
+
return snapshots
|
|
112
121
|
|
|
113
122
|
async def apply_score_delta(
|
|
114
123
|
self,
|
|
@@ -142,6 +151,29 @@ class ExerciseProgressStore:
|
|
|
142
151
|
msg = f"Unknown exercise_type '{exercise_type}'; expected one of {sorted(self._score_tables)}"
|
|
143
152
|
raise ValueError(msg)
|
|
144
153
|
|
|
154
|
+
async def _get_rows_with_scores(
|
|
155
|
+
self,
|
|
156
|
+
) -> tuple[list[dict[str, str | int]], dict[int, dict[str, int]]]:
|
|
157
|
+
"""Fetch translated rows and per-exercise scores for the current user."""
|
|
158
|
+
rows = await self._backend.get_user_words(
|
|
159
|
+
self._user_id,
|
|
160
|
+
self._source_language.value,
|
|
161
|
+
)
|
|
162
|
+
if not rows:
|
|
163
|
+
return [], {}
|
|
164
|
+
source_word_ids = [int(row["source_id"]) for row in rows]
|
|
165
|
+
scores_by_word: dict[int, dict[str, int]] = {}
|
|
166
|
+
for exercise_type, table in self._score_tables.items():
|
|
167
|
+
score_rows = await self._backend.get_user_exercise_scores(
|
|
168
|
+
table,
|
|
169
|
+
self._user_id,
|
|
170
|
+
source_word_ids,
|
|
171
|
+
)
|
|
172
|
+
for score_row in score_rows:
|
|
173
|
+
wid = int(score_row["source_word_id"])
|
|
174
|
+
scores_by_word.setdefault(wid, {})[exercise_type] = int(score_row["score"])
|
|
175
|
+
return rows, scores_by_word
|
|
176
|
+
|
|
145
177
|
def _word_from_row(
|
|
146
178
|
self,
|
|
147
179
|
row: dict[str, str | int],
|
{nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/service.py
RENAMED
|
@@ -6,13 +6,15 @@ and retrieving Word objects backed by Neon PostgreSQL.
|
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
8
|
import os
|
|
9
|
+
from typing import Protocol
|
|
9
10
|
|
|
10
|
-
from nl_processing.core.models import Language, PartOfSpeech, Word
|
|
11
|
+
from nl_processing.core.models import Language, PartOfSpeech, Word, WordPair
|
|
12
|
+
|
|
13
|
+
from nl_processing.database.backend.abstract import AbstractBackend
|
|
11
14
|
from nl_processing.database.backend.neon import NeonBackend
|
|
12
15
|
from nl_processing.database.exceptions import ConfigurationError
|
|
13
16
|
from nl_processing.database.logging import get_logger
|
|
14
|
-
from nl_processing.database.models import AddWordsResult
|
|
15
|
-
from nl_processing.translate_word.service import WordTranslator
|
|
17
|
+
from nl_processing.database.models import AddWordsResult
|
|
16
18
|
|
|
17
19
|
_logger = get_logger("service")
|
|
18
20
|
|
|
@@ -31,6 +33,10 @@ def _read_database_url() -> str:
|
|
|
31
33
|
raise ConfigurationError(_DATABASE_URL_MISSING) from exc
|
|
32
34
|
|
|
33
35
|
|
|
36
|
+
class WordTranslatorProtocol(Protocol):
|
|
37
|
+
async def translate(self, words: list[Word]) -> list[Word]: ...
|
|
38
|
+
|
|
39
|
+
|
|
34
40
|
class DatabaseService:
|
|
35
41
|
"""Async service for persisting and retrieving words with translations."""
|
|
36
42
|
|
|
@@ -40,13 +46,15 @@ class DatabaseService:
|
|
|
40
46
|
user_id: str,
|
|
41
47
|
source_language: Language = Language.NL,
|
|
42
48
|
target_language: Language = Language.RU,
|
|
49
|
+
backend: AbstractBackend | None = None,
|
|
50
|
+
translator: WordTranslatorProtocol | None = None,
|
|
43
51
|
) -> None:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
if backend is None:
|
|
53
|
+
database_url = _read_database_url()
|
|
54
|
+
self._backend: AbstractBackend = NeonBackend(database_url)
|
|
55
|
+
else:
|
|
56
|
+
self._backend = backend
|
|
57
|
+
self._translator = translator
|
|
50
58
|
self._user_id = user_id
|
|
51
59
|
self._source_language = source_language
|
|
52
60
|
self._target_language = target_language
|
|
@@ -79,13 +87,15 @@ class DatabaseService:
|
|
|
79
87
|
new_words.append(word)
|
|
80
88
|
await self._backend.add_user_word(self._user_id, word_id, word.language.value)
|
|
81
89
|
|
|
82
|
-
if
|
|
83
|
-
|
|
90
|
+
new_source_words = [word for word in new_words if word.language == self._source_language]
|
|
91
|
+
if new_source_words and self._translator is not None:
|
|
92
|
+
asyncio.create_task(self._translate_and_store(new_source_words))
|
|
84
93
|
|
|
85
94
|
return AddWordsResult(new_words=new_words, existing_words=existing_words)
|
|
86
95
|
|
|
87
96
|
async def _translate_and_store(self, new_words: list[Word]) -> None:
|
|
88
97
|
"""Translate new words and store translations (fire-and-forget)."""
|
|
98
|
+
assert self._translator is not None
|
|
89
99
|
try:
|
|
90
100
|
translated = await self._translator.translate(new_words)
|
|
91
101
|
for source_word, target_word in zip(new_words, translated):
|