nl-processing 0.5.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. nl_processing-1.0.2/PKG-INFO +119 -0
  2. nl_processing-1.0.2/README.md +105 -0
  3. nl_processing-1.0.2/nl_processing.egg-info/PKG-INFO +119 -0
  4. nl_processing-1.0.2/nl_processing.egg-info/SOURCES.txt +52 -0
  5. {nl_processing-0.5.0 → nl_processing-1.0.2}/nl_processing.egg-info/requires.txt +2 -1
  6. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/core/src}/nl_processing/core/models.py +19 -0
  7. nl_processing-1.0.2/packages/core/src/nl_processing/core/ports.py +25 -0
  8. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/exercise_progress.py +57 -25
  9. nl_processing-1.0.2/packages/database/src/nl_processing/database/models.py +7 -0
  10. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/service.py +21 -11
  11. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/local_store.py +3 -7
  12. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/service.py +11 -6
  13. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/sync.py +4 -3
  14. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/image_encoding.py +1 -2
  15. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py +1 -1
  16. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_words_from_text/src}/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py +1 -1
  17. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_words_from_text/src}/nl_processing/extract_words_from_text/service.py +1 -2
  18. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/sampling/src}/nl_processing/sampling/service.py +3 -11
  19. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_text/src}/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py +1 -1
  20. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_text/src}/nl_processing/translate_text/service.py +1 -2
  21. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_word/src}/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py +1 -1
  22. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_word/src}/nl_processing/translate_word/service.py +1 -2
  23. nl_processing-1.0.2/pyproject.toml +67 -0
  24. nl_processing-0.5.0/PKG-INFO +0 -109
  25. nl_processing-0.5.0/README.md +0 -96
  26. nl_processing-0.5.0/nl_processing/core/scripts/prompt_author.py +0 -59
  27. nl_processing-0.5.0/nl_processing/database/models.py +0 -19
  28. nl_processing-0.5.0/nl_processing/translate_word/__init__.py +0 -0
  29. nl_processing-0.5.0/nl_processing.egg-info/PKG-INFO +0 -109
  30. nl_processing-0.5.0/nl_processing.egg-info/SOURCES.txt +0 -53
  31. nl_processing-0.5.0/pyproject.toml +0 -39
  32. {nl_processing-0.5.0 → nl_processing-1.0.2}/nl_processing.egg-info/dependency_links.txt +0 -0
  33. {nl_processing-0.5.0 → nl_processing-1.0.2}/nl_processing.egg-info/top_level.txt +0 -0
  34. {nl_processing-0.5.0/nl_processing → nl_processing-1.0.2/packages/core/src/nl_processing/core}/__init__.py +0 -0
  35. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/core/src}/nl_processing/core/exceptions.py +0 -0
  36. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/core/src}/nl_processing/core/prompts.py +0 -0
  37. {nl_processing-0.5.0/nl_processing/core → nl_processing-1.0.2/packages/database/src/nl_processing/database}/__init__.py +0 -0
  38. {nl_processing-0.5.0/nl_processing/database → nl_processing-1.0.2/packages/database/src/nl_processing/database/backend}/__init__.py +0 -0
  39. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/_neon_exercise.py +0 -0
  40. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/_queries.py +0 -0
  41. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/abstract.py +0 -0
  42. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/backend/neon.py +0 -0
  43. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/exceptions.py +0 -0
  44. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/logging.py +0 -0
  45. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database/src}/nl_processing/database/testing.py +0 -0
  46. {nl_processing-0.5.0/nl_processing/database/backend → nl_processing-1.0.2/packages/database_cache/src/nl_processing/database_cache}/__init__.py +0 -0
  47. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/_local_store_queries.py +0 -0
  48. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/exceptions.py +0 -0
  49. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/logging.py +0 -0
  50. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/database_cache/src}/nl_processing/database_cache/models.py +0 -0
  51. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/__init__.py +0 -0
  52. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/benchmark.py +0 -0
  53. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/prompts/nl.json +0 -0
  54. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_text_from_image/src}/nl_processing/extract_text_from_image/service.py +2 -2
  55. {nl_processing-0.5.0/nl_processing/database_cache → nl_processing-1.0.2/packages/extract_words_from_text/src/nl_processing/extract_words_from_text}/__init__.py +0 -0
  56. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/extract_words_from_text/src}/nl_processing/extract_words_from_text/prompts/nl.json +0 -0
  57. {nl_processing-0.5.0/nl_processing/extract_words_from_text → nl_processing-1.0.2/packages/sampling/src/nl_processing/sampling}/__init__.py +0 -0
  58. {nl_processing-0.5.0/nl_processing/sampling → nl_processing-1.0.2/packages/translate_text/src/nl_processing/translate_text}/__init__.py +0 -0
  59. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_text/src}/nl_processing/translate_text/prompts/nl_ru.json +0 -0
  60. {nl_processing-0.5.0/nl_processing/translate_text → nl_processing-1.0.2/packages/translate_word/src/nl_processing/translate_word}/__init__.py +0 -0
  61. {nl_processing-0.5.0 → nl_processing-1.0.2/packages/translate_word/src}/nl_processing/translate_word/prompts/nl_ru.json +0 -0
  62. {nl_processing-0.5.0 → nl_processing-1.0.2}/setup.cfg +0 -0
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: nl_processing
3
+ Version: 1.0.2
4
+ Summary: Aggregate build for the nl_processing multi-package repository
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pydantic<3,>=2.0
8
+ Requires-Dist: langchain-core<1,>=0.3
9
+ Requires-Dist: langchain-openai<1,>=0.3
10
+ Requires-Dist: numpy<3,>=2.0
11
+ Requires-Dist: opencv-python<5,>=4.10
12
+ Requires-Dist: asyncpg<1,>=0.30
13
+ Requires-Dist: aiosqlite<1,>=0.20
14
+
15
+ # nl_processing
16
+
17
+ [![PyPI](https://img.shields.io/pypi/v/nl_processing)](https://pypi.org/project/nl_processing/)
18
+
19
+ Dutch language processing toolkit organized as a multi-package Python repository.
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install nl_processing
25
+ ```
26
+
27
+ The published `nl_processing` package is the aggregate build from the repo root. Day-to-day development happens inside the package folders under `packages/`.
28
+
29
+ ## Repository Layout
30
+
31
+ ```text
32
+ packages/
33
+ core/
34
+ extract_text_from_image/
35
+ extract_words_from_text/
36
+ translate_text/
37
+ translate_word/
38
+ database/
39
+ database_cache/
40
+ sampling/
41
+ docs/
42
+ pyproject.toml # aggregate build for the published nl_processing package
43
+ Makefile # repo-wide lint/test entrypoint
44
+ ```
45
+
46
+ Each package has its own:
47
+
48
+ - `pyproject.toml`
49
+ - `ruff.toml`
50
+ - `pytest.ini`
51
+ - `tests/`
52
+ - `docs/`
53
+
54
+ ## Modules
55
+
56
+ | Module | Class | Description | Docs |
57
+ |---|---|---|---|
58
+ | `core` | N/A | Shared models, ports, exceptions, and prompt helpers | [docs](packages/core/docs/module-spec.md) |
59
+ | `extract_text_from_image` | `ImageTextExtractor` | Extract Dutch text from images via Vision API | [docs](packages/extract_text_from_image/docs/module-spec.md) |
60
+ | `extract_words_from_text` | `WordExtractor` | Extract and normalize words from markdown text | [docs](packages/extract_words_from_text/docs/module-spec.md) |
61
+ | `translate_text` | `TextTranslator` | Translate text (NL -> RU) with markdown preservation | [docs](packages/translate_text/docs/module-spec.md) |
62
+ | `translate_word` | `WordTranslator` | Batch-translate words (NL -> RU) | [docs](packages/translate_word/docs/module-spec.md) |
63
+ | `database` | `DatabaseService` | Remote source of truth and default progress/sync provider | [docs](packages/database/docs/module-spec.md) |
64
+ | `database_cache` | `DatabaseCacheService` | Local-first SQLite cache with injectable remote progress sync | [docs](packages/database_cache/docs/module-spec.md) |
65
+ | `sampling` | `WordSampler` | Weighted word sampling over any compatible scored-pair provider | [docs](packages/sampling/docs/module-spec.md) |
66
+
67
+ ## Development
68
+
69
+ Work inside one package when you only touch one module:
70
+
71
+ ```bash
72
+ cd packages/translate_word
73
+ uv sync --all-groups
74
+ uv run pytest tests/unit
75
+ ```
76
+
77
+ Run the repo-wide quality gate from the root:
78
+
79
+ ```bash
80
+ make check
81
+ ```
82
+
83
+ Useful package-local examples:
84
+
85
+ ```bash
86
+ cd packages/core
87
+ uv run pytest tests/unit/core
88
+
89
+ cd packages/database
90
+ doppler run -- uv run pytest tests/integration/database
91
+ ```
92
+
93
+ ## Dependency Rule
94
+
95
+ Modules are independent packages. Cross-module dependencies must be explicit in the consuming package's `pyproject.toml`.
96
+
97
+ Shared cross-module storage contracts live in `nl_processing.core.ports`. `database` and `database_cache` are concrete implementations and adapters, not the owners of those shared interfaces.
98
+
99
+ One intentional design change in this layout: `database` no longer imports `translate_word` directly. If you want automatic translation on `add_words()`, compose it explicitly:
100
+
101
+ ```python
102
+ from nl_processing.core.models import Language
103
+ from nl_processing.database.service import DatabaseService
104
+ from nl_processing.translate_word.service import WordTranslator
105
+
106
+ db = DatabaseService(
107
+ user_id="alex",
108
+ translator=WordTranslator(
109
+ source_language=Language.NL,
110
+ target_language=Language.RU,
111
+ ),
112
+ )
113
+ ```
114
+
115
+ ## Docs
116
+
117
+ - Repository module spec: [docs/module-spec.md](docs/module-spec.md)
118
+ - Environment variables: [docs/ENV_VARS.md](docs/ENV_VARS.md)
119
+ - Release workflow: [docs/REALEASE_WORKFLOW.md](docs/REALEASE_WORKFLOW.md)
@@ -0,0 +1,105 @@
1
+ # nl_processing
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/nl_processing)](https://pypi.org/project/nl_processing/)
4
+
5
+ Dutch language processing toolkit organized as a multi-package Python repository.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install nl_processing
11
+ ```
12
+
13
+ The published `nl_processing` package is the aggregate build from the repo root. Day-to-day development happens inside the package folders under `packages/`.
14
+
15
+ ## Repository Layout
16
+
17
+ ```text
18
+ packages/
19
+ core/
20
+ extract_text_from_image/
21
+ extract_words_from_text/
22
+ translate_text/
23
+ translate_word/
24
+ database/
25
+ database_cache/
26
+ sampling/
27
+ docs/
28
+ pyproject.toml # aggregate build for the published nl_processing package
29
+ Makefile # repo-wide lint/test entrypoint
30
+ ```
31
+
32
+ Each package has its own:
33
+
34
+ - `pyproject.toml`
35
+ - `ruff.toml`
36
+ - `pytest.ini`
37
+ - `tests/`
38
+ - `docs/`
39
+
40
+ ## Modules
41
+
42
+ | Module | Class | Description | Docs |
43
+ |---|---|---|---|
44
+ | `core` | N/A | Shared models, ports, exceptions, and prompt helpers | [docs](packages/core/docs/module-spec.md) |
45
+ | `extract_text_from_image` | `ImageTextExtractor` | Extract Dutch text from images via Vision API | [docs](packages/extract_text_from_image/docs/module-spec.md) |
46
+ | `extract_words_from_text` | `WordExtractor` | Extract and normalize words from markdown text | [docs](packages/extract_words_from_text/docs/module-spec.md) |
47
+ | `translate_text` | `TextTranslator` | Translate text (NL -> RU) with markdown preservation | [docs](packages/translate_text/docs/module-spec.md) |
48
+ | `translate_word` | `WordTranslator` | Batch-translate words (NL -> RU) | [docs](packages/translate_word/docs/module-spec.md) |
49
+ | `database` | `DatabaseService` | Remote source of truth and default progress/sync provider | [docs](packages/database/docs/module-spec.md) |
50
+ | `database_cache` | `DatabaseCacheService` | Local-first SQLite cache with injectable remote progress sync | [docs](packages/database_cache/docs/module-spec.md) |
51
+ | `sampling` | `WordSampler` | Weighted word sampling over any compatible scored-pair provider | [docs](packages/sampling/docs/module-spec.md) |
52
+
53
+ ## Development
54
+
55
+ Work inside one package when you only touch one module:
56
+
57
+ ```bash
58
+ cd packages/translate_word
59
+ uv sync --all-groups
60
+ uv run pytest tests/unit
61
+ ```
62
+
63
+ Run the repo-wide quality gate from the root:
64
+
65
+ ```bash
66
+ make check
67
+ ```
68
+
69
+ Useful package-local examples:
70
+
71
+ ```bash
72
+ cd packages/core
73
+ uv run pytest tests/unit/core
74
+
75
+ cd packages/database
76
+ doppler run -- uv run pytest tests/integration/database
77
+ ```
78
+
79
+ ## Dependency Rule
80
+
81
+ Modules are independent packages. Cross-module dependencies must be explicit in the consuming package's `pyproject.toml`.
82
+
83
+ Shared cross-module storage contracts live in `nl_processing.core.ports`. `database` and `database_cache` are concrete implementations and adapters, not the owners of those shared interfaces.
84
+
85
+ One intentional design change in this layout: `database` no longer imports `translate_word` directly. If you want automatic translation on `add_words()`, compose it explicitly:
86
+
87
+ ```python
88
+ from nl_processing.core.models import Language
89
+ from nl_processing.database.service import DatabaseService
90
+ from nl_processing.translate_word.service import WordTranslator
91
+
92
+ db = DatabaseService(
93
+ user_id="alex",
94
+ translator=WordTranslator(
95
+ source_language=Language.NL,
96
+ target_language=Language.RU,
97
+ ),
98
+ )
99
+ ```
100
+
101
+ ## Docs
102
+
103
+ - Repository module spec: [docs/module-spec.md](docs/module-spec.md)
104
+ - Environment variables: [docs/ENV_VARS.md](docs/ENV_VARS.md)
105
+ - Release workflow: [docs/REALEASE_WORKFLOW.md](docs/REALEASE_WORKFLOW.md)
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: nl_processing
3
+ Version: 1.0.2
4
+ Summary: Aggregate build for the nl_processing multi-package repository
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pydantic<3,>=2.0
8
+ Requires-Dist: langchain-core<1,>=0.3
9
+ Requires-Dist: langchain-openai<1,>=0.3
10
+ Requires-Dist: numpy<3,>=2.0
11
+ Requires-Dist: opencv-python<5,>=4.10
12
+ Requires-Dist: asyncpg<1,>=0.30
13
+ Requires-Dist: aiosqlite<1,>=0.20
14
+
15
+ # nl_processing
16
+
17
+ [![PyPI](https://img.shields.io/pypi/v/nl_processing)](https://pypi.org/project/nl_processing/)
18
+
19
+ Dutch language processing toolkit organized as a multi-package Python repository.
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install nl_processing
25
+ ```
26
+
27
+ The published `nl_processing` package is the aggregate build from the repo root. Day-to-day development happens inside the package folders under `packages/`.
28
+
29
+ ## Repository Layout
30
+
31
+ ```text
32
+ packages/
33
+ core/
34
+ extract_text_from_image/
35
+ extract_words_from_text/
36
+ translate_text/
37
+ translate_word/
38
+ database/
39
+ database_cache/
40
+ sampling/
41
+ docs/
42
+ pyproject.toml # aggregate build for the published nl_processing package
43
+ Makefile # repo-wide lint/test entrypoint
44
+ ```
45
+
46
+ Each package has its own:
47
+
48
+ - `pyproject.toml`
49
+ - `ruff.toml`
50
+ - `pytest.ini`
51
+ - `tests/`
52
+ - `docs/`
53
+
54
+ ## Modules
55
+
56
+ | Module | Class | Description | Docs |
57
+ |---|---|---|---|
58
+ | `core` | N/A | Shared models, ports, exceptions, and prompt helpers | [docs](packages/core/docs/module-spec.md) |
59
+ | `extract_text_from_image` | `ImageTextExtractor` | Extract Dutch text from images via Vision API | [docs](packages/extract_text_from_image/docs/module-spec.md) |
60
+ | `extract_words_from_text` | `WordExtractor` | Extract and normalize words from markdown text | [docs](packages/extract_words_from_text/docs/module-spec.md) |
61
+ | `translate_text` | `TextTranslator` | Translate text (NL -> RU) with markdown preservation | [docs](packages/translate_text/docs/module-spec.md) |
62
+ | `translate_word` | `WordTranslator` | Batch-translate words (NL -> RU) | [docs](packages/translate_word/docs/module-spec.md) |
63
+ | `database` | `DatabaseService` | Remote source of truth and default progress/sync provider | [docs](packages/database/docs/module-spec.md) |
64
+ | `database_cache` | `DatabaseCacheService` | Local-first SQLite cache with injectable remote progress sync | [docs](packages/database_cache/docs/module-spec.md) |
65
+ | `sampling` | `WordSampler` | Weighted word sampling over any compatible scored-pair provider | [docs](packages/sampling/docs/module-spec.md) |
66
+
67
+ ## Development
68
+
69
+ Work inside one package when you only touch one module:
70
+
71
+ ```bash
72
+ cd packages/translate_word
73
+ uv sync --all-groups
74
+ uv run pytest tests/unit
75
+ ```
76
+
77
+ Run the repo-wide quality gate from the root:
78
+
79
+ ```bash
80
+ make check
81
+ ```
82
+
83
+ Useful package-local examples:
84
+
85
+ ```bash
86
+ cd packages/core
87
+ uv run pytest tests/unit/core
88
+
89
+ cd packages/database
90
+ doppler run -- uv run pytest tests/integration/database
91
+ ```
92
+
93
+ ## Dependency Rule
94
+
95
+ Modules are independent packages. Cross-module dependencies must be explicit in the consuming package's `pyproject.toml`.
96
+
97
+ Shared cross-module storage contracts live in `nl_processing.core.ports`. `database` and `database_cache` are concrete implementations and adapters, not the owners of those shared interfaces.
98
+
99
+ One intentional design change in this layout: `database` no longer imports `translate_word` directly. If you want automatic translation on `add_words()`, compose it explicitly:
100
+
101
+ ```python
102
+ from nl_processing.core.models import Language
103
+ from nl_processing.database.service import DatabaseService
104
+ from nl_processing.translate_word.service import WordTranslator
105
+
106
+ db = DatabaseService(
107
+ user_id="alex",
108
+ translator=WordTranslator(
109
+ source_language=Language.NL,
110
+ target_language=Language.RU,
111
+ ),
112
+ )
113
+ ```
114
+
115
+ ## Docs
116
+
117
+ - Repository module spec: [docs/module-spec.md](docs/module-spec.md)
118
+ - Environment variables: [docs/ENV_VARS.md](docs/ENV_VARS.md)
119
+ - Release workflow: [docs/REALEASE_WORKFLOW.md](docs/REALEASE_WORKFLOW.md)
@@ -0,0 +1,52 @@
1
+ README.md
2
+ pyproject.toml
3
+ nl_processing.egg-info/PKG-INFO
4
+ nl_processing.egg-info/SOURCES.txt
5
+ nl_processing.egg-info/dependency_links.txt
6
+ nl_processing.egg-info/requires.txt
7
+ nl_processing.egg-info/top_level.txt
8
+ packages/core/src/nl_processing/core/__init__.py
9
+ packages/core/src/nl_processing/core/exceptions.py
10
+ packages/core/src/nl_processing/core/models.py
11
+ packages/core/src/nl_processing/core/ports.py
12
+ packages/core/src/nl_processing/core/prompts.py
13
+ packages/database/src/nl_processing/database/__init__.py
14
+ packages/database/src/nl_processing/database/exceptions.py
15
+ packages/database/src/nl_processing/database/exercise_progress.py
16
+ packages/database/src/nl_processing/database/logging.py
17
+ packages/database/src/nl_processing/database/models.py
18
+ packages/database/src/nl_processing/database/service.py
19
+ packages/database/src/nl_processing/database/testing.py
20
+ packages/database/src/nl_processing/database/backend/__init__.py
21
+ packages/database/src/nl_processing/database/backend/_neon_exercise.py
22
+ packages/database/src/nl_processing/database/backend/_queries.py
23
+ packages/database/src/nl_processing/database/backend/abstract.py
24
+ packages/database/src/nl_processing/database/backend/neon.py
25
+ packages/database_cache/src/nl_processing/database_cache/__init__.py
26
+ packages/database_cache/src/nl_processing/database_cache/_local_store_queries.py
27
+ packages/database_cache/src/nl_processing/database_cache/exceptions.py
28
+ packages/database_cache/src/nl_processing/database_cache/local_store.py
29
+ packages/database_cache/src/nl_processing/database_cache/logging.py
30
+ packages/database_cache/src/nl_processing/database_cache/models.py
31
+ packages/database_cache/src/nl_processing/database_cache/service.py
32
+ packages/database_cache/src/nl_processing/database_cache/sync.py
33
+ packages/extract_text_from_image/src/nl_processing/extract_text_from_image/__init__.py
34
+ packages/extract_text_from_image/src/nl_processing/extract_text_from_image/benchmark.py
35
+ packages/extract_text_from_image/src/nl_processing/extract_text_from_image/image_encoding.py
36
+ packages/extract_text_from_image/src/nl_processing/extract_text_from_image/service.py
37
+ packages/extract_text_from_image/src/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py
38
+ packages/extract_text_from_image/src/nl_processing/extract_text_from_image/prompts/nl.json
39
+ packages/extract_words_from_text/src/nl_processing/extract_words_from_text/__init__.py
40
+ packages/extract_words_from_text/src/nl_processing/extract_words_from_text/service.py
41
+ packages/extract_words_from_text/src/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py
42
+ packages/extract_words_from_text/src/nl_processing/extract_words_from_text/prompts/nl.json
43
+ packages/sampling/src/nl_processing/sampling/__init__.py
44
+ packages/sampling/src/nl_processing/sampling/service.py
45
+ packages/translate_text/src/nl_processing/translate_text/__init__.py
46
+ packages/translate_text/src/nl_processing/translate_text/service.py
47
+ packages/translate_text/src/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py
48
+ packages/translate_text/src/nl_processing/translate_text/prompts/nl_ru.json
49
+ packages/translate_word/src/nl_processing/translate_word/__init__.py
50
+ packages/translate_word/src/nl_processing/translate_word/service.py
51
+ packages/translate_word/src/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py
52
+ packages/translate_word/src/nl_processing/translate_word/prompts/nl_ru.json
@@ -1,6 +1,7 @@
1
1
  pydantic<3,>=2.0
2
- langchain<1,>=0.3
2
+ langchain-core<1,>=0.3
3
3
  langchain-openai<1,>=0.3
4
+ numpy<3,>=2.0
4
5
  opencv-python<5,>=4.10
5
6
  asyncpg<1,>=0.30
6
7
  aiosqlite<1,>=0.20
@@ -45,3 +45,22 @@ class Word(BaseModel):
45
45
  normalized_form: str
46
46
  word_type: PartOfSpeech
47
47
  language: Language
48
+
49
+
50
+ class WordPair(BaseModel):
51
+ source: Word
52
+ target: Word
53
+
54
+
55
+ class ScoredWordPair(BaseModel):
56
+ """Translated pair with per-exercise scores and a stable source-word ID."""
57
+
58
+ pair: WordPair
59
+ scores: dict[str, int]
60
+ source_word_id: int
61
+
62
+
63
+ class WordPairSnapshot(ScoredWordPair):
64
+ """Remote sync snapshot with stable source and target IDs for cache rebuilds."""
65
+
66
+ target_word_id: int
@@ -0,0 +1,25 @@
1
+ from typing import Protocol, runtime_checkable
2
+
3
+ from nl_processing.core.models import ScoredWordPair, WordPairSnapshot
4
+
5
+
6
+ @runtime_checkable
7
+ class ScoredPairProvider(Protocol):
8
+ """Provider of score-aware word pairs for sampling or practice flows."""
9
+
10
+ async def get_word_pairs_with_scores(self) -> list[ScoredWordPair]: ...
11
+
12
+
13
+ @runtime_checkable
14
+ class RemoteProgressSyncPort(Protocol):
15
+ """Remote sync contract consumed by the local cache layer."""
16
+
17
+ async def export_remote_snapshot(self) -> list[WordPairSnapshot]: ...
18
+
19
+ async def apply_score_delta(
20
+ self,
21
+ event_id: str,
22
+ source_word_id: int,
23
+ exercise_type: str,
24
+ delta: int,
25
+ ) -> None: ...
@@ -1,15 +1,16 @@
1
1
  """ExerciseProgressStore — per-user, per-exercise score tracking.
2
2
 
3
- Internal API consumed by the sampling module to determine which
4
- words to practice based on exercise-specific scores.
3
+ Default implementation of the shared scored-pair and remote-progress
4
+ sync contracts used by sampling and database_cache.
5
5
  """
6
6
 
7
7
  import os
8
8
 
9
- from nl_processing.core.models import Language, PartOfSpeech, Word
9
+ from nl_processing.core.models import Language, PartOfSpeech, ScoredWordPair, Word, WordPair, WordPairSnapshot
10
+
11
+ from nl_processing.database.backend.abstract import AbstractBackend
10
12
  from nl_processing.database.backend.neon import NeonBackend
11
13
  from nl_processing.database.exceptions import ConfigurationError
12
- from nl_processing.database.models import ScoredWordPair, WordPair
13
14
 
14
15
  _DATABASE_URL_MISSING = (
15
16
  "DATABASE_URL environment variable is required. "
@@ -27,7 +28,7 @@ def _read_database_url() -> str:
27
28
 
28
29
 
29
30
  class ExerciseProgressStore:
30
- """Per-user, per-exercise score tracking and score-aware word pair retrieval."""
31
+ """Per-user progress store for score-aware reads and remote cache sync."""
31
32
 
32
33
  def __init__(
33
34
  self,
@@ -36,12 +37,16 @@ class ExerciseProgressStore:
36
37
  source_language: Language,
37
38
  target_language: Language,
38
39
  exercise_types: list[str],
40
+ backend: AbstractBackend | None = None,
39
41
  ) -> None:
40
42
  if not exercise_types:
41
43
  msg = "exercise_types must be a non-empty list"
42
44
  raise ValueError(msg)
43
- database_url = _read_database_url()
44
- self._backend = NeonBackend(database_url)
45
+ if backend is None:
46
+ database_url = _read_database_url()
47
+ self._backend: AbstractBackend = NeonBackend(database_url)
48
+ else:
49
+ self._backend = backend
45
50
  self._user_id = user_id
46
51
  self._source_language = source_language
47
52
  self._target_language = target_language
@@ -78,23 +83,9 @@ class ExerciseProgressStore:
78
83
 
79
84
  Missing scores default to 0 (FR33).
80
85
  """
81
- rows = await self._backend.get_user_words(
82
- self._user_id,
83
- self._source_language.value,
84
- )
86
+ rows, scores_by_word = await self._get_rows_with_scores()
85
87
  if not rows:
86
88
  return []
87
- source_word_ids = [int(row["source_id"]) for row in rows]
88
- scores_by_word: dict[int, dict[str, int]] = {}
89
- for et, table in self._score_tables.items():
90
- score_rows = await self._backend.get_user_exercise_scores(
91
- table,
92
- self._user_id,
93
- source_word_ids,
94
- )
95
- for score_row in score_rows:
96
- wid = int(score_row["source_word_id"])
97
- scores_by_word.setdefault(wid, {})[et] = int(score_row["score"])
98
89
  result: list[ScoredWordPair] = []
99
90
  for row in rows:
100
91
  pair = self._row_to_word_pair(row)
@@ -106,9 +97,27 @@ class ExerciseProgressStore:
106
97
  )
107
98
  return result
108
99
 
109
- async def export_remote_snapshot(self) -> list[ScoredWordPair]:
110
- """Thin wrapper around get_word_pairs_with_scores for cache consumers."""
111
- return await self.get_word_pairs_with_scores()
100
+ async def export_remote_snapshot(self) -> list[WordPairSnapshot]:
101
+ """Return score-aware pairs with stable remote IDs for cache consumers."""
102
+ rows, scores_by_word = await self._get_rows_with_scores()
103
+ if not rows:
104
+ return []
105
+ snapshots: list[WordPairSnapshot] = []
106
+ for row in rows:
107
+ pair = self._row_to_word_pair(row)
108
+ source_word_id = int(row["source_id"])
109
+ target_word_id = int(row["target_id"])
110
+ word_scores = scores_by_word.get(source_word_id, {})
111
+ scores = {et: word_scores.get(et, 0) for et in self._exercise_types}
112
+ snapshots.append(
113
+ WordPairSnapshot(
114
+ pair=pair,
115
+ scores=scores,
116
+ source_word_id=source_word_id,
117
+ target_word_id=target_word_id,
118
+ ),
119
+ )
120
+ return snapshots
112
121
 
113
122
  async def apply_score_delta(
114
123
  self,
@@ -142,6 +151,29 @@ class ExerciseProgressStore:
142
151
  msg = f"Unknown exercise_type '{exercise_type}'; expected one of {sorted(self._score_tables)}"
143
152
  raise ValueError(msg)
144
153
 
154
+ async def _get_rows_with_scores(
155
+ self,
156
+ ) -> tuple[list[dict[str, str | int]], dict[int, dict[str, int]]]:
157
+ """Fetch translated rows and per-exercise scores for the current user."""
158
+ rows = await self._backend.get_user_words(
159
+ self._user_id,
160
+ self._source_language.value,
161
+ )
162
+ if not rows:
163
+ return [], {}
164
+ source_word_ids = [int(row["source_id"]) for row in rows]
165
+ scores_by_word: dict[int, dict[str, int]] = {}
166
+ for exercise_type, table in self._score_tables.items():
167
+ score_rows = await self._backend.get_user_exercise_scores(
168
+ table,
169
+ self._user_id,
170
+ source_word_ids,
171
+ )
172
+ for score_row in score_rows:
173
+ wid = int(score_row["source_word_id"])
174
+ scores_by_word.setdefault(wid, {})[exercise_type] = int(score_row["score"])
175
+ return rows, scores_by_word
176
+
145
177
  def _word_from_row(
146
178
  self,
147
179
  row: dict[str, str | int],
@@ -0,0 +1,7 @@
1
+ from nl_processing.core.models import Word
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class AddWordsResult(BaseModel):
6
+ new_words: list[Word]
7
+ existing_words: list[Word]
@@ -6,13 +6,15 @@ and retrieving Word objects backed by Neon PostgreSQL.
6
6
 
7
7
  import asyncio
8
8
  import os
9
+ from typing import Protocol
9
10
 
10
- from nl_processing.core.models import Language, PartOfSpeech, Word
11
+ from nl_processing.core.models import Language, PartOfSpeech, Word, WordPair
12
+
13
+ from nl_processing.database.backend.abstract import AbstractBackend
11
14
  from nl_processing.database.backend.neon import NeonBackend
12
15
  from nl_processing.database.exceptions import ConfigurationError
13
16
  from nl_processing.database.logging import get_logger
14
- from nl_processing.database.models import AddWordsResult, WordPair
15
- from nl_processing.translate_word.service import WordTranslator
17
+ from nl_processing.database.models import AddWordsResult
16
18
 
17
19
  _logger = get_logger("service")
18
20
 
@@ -31,6 +33,10 @@ def _read_database_url() -> str:
31
33
  raise ConfigurationError(_DATABASE_URL_MISSING) from exc
32
34
 
33
35
 
36
+ class WordTranslatorProtocol(Protocol):
37
+ async def translate(self, words: list[Word]) -> list[Word]: ...
38
+
39
+
34
40
  class DatabaseService:
35
41
  """Async service for persisting and retrieving words with translations."""
36
42
 
@@ -40,13 +46,15 @@ class DatabaseService:
40
46
  user_id: str,
41
47
  source_language: Language = Language.NL,
42
48
  target_language: Language = Language.RU,
49
+ backend: AbstractBackend | None = None,
50
+ translator: WordTranslatorProtocol | None = None,
43
51
  ) -> None:
44
- database_url = _read_database_url()
45
- self._backend = NeonBackend(database_url)
46
- self._translator = WordTranslator(
47
- source_language=source_language,
48
- target_language=target_language,
49
- )
52
+ if backend is None:
53
+ database_url = _read_database_url()
54
+ self._backend: AbstractBackend = NeonBackend(database_url)
55
+ else:
56
+ self._backend = backend
57
+ self._translator = translator
50
58
  self._user_id = user_id
51
59
  self._source_language = source_language
52
60
  self._target_language = target_language
@@ -79,13 +87,15 @@ class DatabaseService:
79
87
  new_words.append(word)
80
88
  await self._backend.add_user_word(self._user_id, word_id, word.language.value)
81
89
 
82
- if new_words:
83
- asyncio.create_task(self._translate_and_store(new_words))
90
+ new_source_words = [word for word in new_words if word.language == self._source_language]
91
+ if new_source_words and self._translator is not None:
92
+ asyncio.create_task(self._translate_and_store(new_source_words))
84
93
 
85
94
  return AddWordsResult(new_words=new_words, existing_words=existing_words)
86
95
 
87
96
  async def _translate_and_store(self, new_words: list[Word]) -> None:
88
97
  """Translate new words and store translations (fire-and-forget)."""
98
+ assert self._translator is not None
89
99
  try:
90
100
  translated = await self._translator.translate(new_words)
91
101
  for source_word, target_word in zip(new_words, translated):