grammared-language 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grammared_language-0.1.0/LICENSE.md +5 -0
- grammared_language-0.1.0/PKG-INFO +226 -0
- grammared_language-0.1.0/README.md +149 -0
- grammared_language-0.1.0/grammared_language/__init__.py +22 -0
- grammared_language-0.1.0/grammared_language/api/__init__.py +20 -0
- grammared_language-0.1.0/grammared_language/api/generate_grpc.py +84 -0
- grammared_language-0.1.0/grammared_language/api/grpc_gen/__init__.py +19 -0
- grammared_language-0.1.0/grammared_language/api/grpc_gen/ml_server_pb2.py +83 -0
- grammared_language-0.1.0/grammared_language/api/grpc_gen/ml_server_pb2.pyi +255 -0
- grammared_language-0.1.0/grammared_language/api/grpc_gen/ml_server_pb2_grpc.py +330 -0
- grammared_language-0.1.0/grammared_language/api/grpc_gen/py.typed +0 -0
- grammared_language-0.1.0/grammared_language/api/util.py +30 -0
- grammared_language-0.1.0/grammared_language/clients/__init__.py +20 -0
- grammared_language-0.1.0/grammared_language/clients/async_multi_client.py +270 -0
- grammared_language-0.1.0/grammared_language/clients/base_client.py +59 -0
- grammared_language-0.1.0/grammared_language/clients/coedit_client.py +70 -0
- grammared_language-0.1.0/grammared_language/clients/gector_client.py +81 -0
- grammared_language-0.1.0/grammared_language/clients/grammar_classification_client.py +212 -0
- grammared_language-0.1.0/grammared_language/clients/text2text_base_client.py +180 -0
- grammared_language-0.1.0/grammared_language/grammared_classifier/__init__.py +21 -0
- grammared_language-0.1.0/grammared_language/grammared_classifier/classifier_pipeline.py +240 -0
- grammared_language-0.1.0/grammared_language/grammared_classifier/grammared_classifier_model.py +52 -0
- grammared_language-0.1.0/grammared_language/language_tool/__init__.py +20 -0
- grammared_language-0.1.0/grammared_language/language_tool/output_models.py +114 -0
- grammared_language-0.1.0/grammared_language/triton/__init__.py +31 -0
- grammared_language-0.1.0/grammared_language/triton/builder/__init__.py +21 -0
- grammared_language-0.1.0/grammared_language/triton/builder/data_model.py +55 -0
- grammared_language-0.1.0/grammared_language/triton/builder/repo_builder.py +78 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/__init__.py +0 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/gector.config.pbtxt.jinja +70 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/gector.model.py.jinja +9 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/grammared_classifier.config.pbtxt.jinja +61 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/grammared_classifier.model.py.jinja +4 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/text2text.config.pbtxt.jinja +58 -0
- grammared_language-0.1.0/grammared_language/triton/builder/triton_templates/text2text.model.py.jinja +9 -0
- grammared_language-0.1.0/grammared_language/triton/triton_gector_model.py +151 -0
- grammared_language-0.1.0/grammared_language/triton/triton_grammared_classifier_model.py +217 -0
- grammared_language-0.1.0/grammared_language/triton/triton_text2text_model.py +38 -0
- grammared_language-0.1.0/grammared_language/triton/triton_transformers_model.py +190 -0
- grammared_language-0.1.0/grammared_language/utils/__init__.py +21 -0
- grammared_language-0.1.0/grammared_language/utils/config_parser.py +352 -0
- grammared_language-0.1.0/grammared_language/utils/errant_grammar_correction_extractor.py +264 -0
- grammared_language-0.1.0/grammared_language/utils/grammar_correction_extractor.py +380 -0
- grammared_language-0.1.0/grammared_language.egg-info/PKG-INFO +226 -0
- grammared_language-0.1.0/grammared_language.egg-info/SOURCES.txt +48 -0
- grammared_language-0.1.0/grammared_language.egg-info/dependency_links.txt +1 -0
- grammared_language-0.1.0/grammared_language.egg-info/requires.txt +67 -0
- grammared_language-0.1.0/grammared_language.egg-info/top_level.txt +1 -0
- grammared_language-0.1.0/pyproject.toml +132 -0
- grammared_language-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grammared-language
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Grammar error correction backend with Triton and FastAPI/ GRPC integration
|
|
5
|
+
Author-email: Ray Liu <ray@rayliu.ca>
|
|
6
|
+
License: # License
|
|
7
|
+
|
|
8
|
+
To be determined by the repository owner.
|
|
9
|
+
|
|
10
|
+
Please contact the repository maintainer for licensing information.
|
|
11
|
+
|
|
12
|
+
Requires-Python: >=3.11
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE.md
|
|
15
|
+
Requires-Dist: pip>=25.3
|
|
16
|
+
Requires-Dist: pydantic
|
|
17
|
+
Requires-Dist: openai
|
|
18
|
+
Requires-Dist: jinja2>=3.1.6
|
|
19
|
+
Requires-Dist: transformers>=4.30.0
|
|
20
|
+
Requires-Dist: scikit-learn
|
|
21
|
+
Requires-Dist: tritonclient[grpc]>=2.60.0
|
|
22
|
+
Requires-Dist: gector-triton
|
|
23
|
+
Requires-Dist: fastapi>=0.128.0
|
|
24
|
+
Requires-Dist: errant>=3.0.0
|
|
25
|
+
Requires-Dist: uvicorn
|
|
26
|
+
Requires-Dist: grpcio
|
|
27
|
+
Requires-Dist: grpcio-tools
|
|
28
|
+
Requires-Dist: protobuf>=5.29.5
|
|
29
|
+
Requires-Dist: torch
|
|
30
|
+
Requires-Dist: numpy
|
|
31
|
+
Requires-Dist: sentencepiece>=0.2.1
|
|
32
|
+
Provides-Extra: docker
|
|
33
|
+
Requires-Dist: pydantic==2.12.5; extra == "docker"
|
|
34
|
+
Requires-Dist: openai==2.15.0; extra == "docker"
|
|
35
|
+
Requires-Dist: jinja2==3.1.6; extra == "docker"
|
|
36
|
+
Requires-Dist: transformers==4.57.6; extra == "docker"
|
|
37
|
+
Requires-Dist: scikit-learn==1.8.0; extra == "docker"
|
|
38
|
+
Requires-Dist: tritonclient[grpc]==2.64.0; extra == "docker"
|
|
39
|
+
Requires-Dist: gector-triton; extra == "docker"
|
|
40
|
+
Requires-Dist: fastapi==0.128.0; extra == "docker"
|
|
41
|
+
Requires-Dist: errant==3.0.0; extra == "docker"
|
|
42
|
+
Requires-Dist: spacy==3.8.11; extra == "docker"
|
|
43
|
+
Requires-Dist: uvicorn==0.40.0; extra == "docker"
|
|
44
|
+
Requires-Dist: grpcio==1.67.1; extra == "docker"
|
|
45
|
+
Requires-Dist: grpcio-tools==1.67.1; extra == "docker"
|
|
46
|
+
Requires-Dist: numpy==1.26.4; extra == "docker"
|
|
47
|
+
Provides-Extra: triton
|
|
48
|
+
Requires-Dist: torch; extra == "triton"
|
|
49
|
+
Requires-Dist: transformers[torch]==4.57.6; extra == "triton"
|
|
50
|
+
Requires-Dist: optimum[onnxruntime-gpu]; extra == "triton"
|
|
51
|
+
Requires-Dist: bitsandbytes; extra == "triton"
|
|
52
|
+
Requires-Dist: accelerate; extra == "triton"
|
|
53
|
+
Provides-Extra: triton-arm
|
|
54
|
+
Requires-Dist: transformers[torch]==4.57.6; extra == "triton-arm"
|
|
55
|
+
Requires-Dist: optimum[onnxruntime]; extra == "triton-arm"
|
|
56
|
+
Requires-Dist: bitsandbytes; extra == "triton-arm"
|
|
57
|
+
Requires-Dist: accelerate; extra == "triton-arm"
|
|
58
|
+
Requires-Dist: torch==2.10.0+cpu; extra == "triton-arm"
|
|
59
|
+
Provides-Extra: dev
|
|
60
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
61
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
62
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
63
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
|
|
64
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
65
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
66
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
67
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
68
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
69
|
+
Provides-Extra: test
|
|
70
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
71
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
|
72
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "test"
|
|
73
|
+
Requires-Dist: requests>=2.28.0; extra == "test"
|
|
74
|
+
Provides-Extra: all
|
|
75
|
+
Requires-Dist: grammared-language[dev,test,triton]; extra == "all"
|
|
76
|
+
Dynamic: license-file
|
|
77
|
+
|
|
78
|
+
# Grammared Language
|
|
79
|
+
|
|
80
|
+
Adding Grammarly (and other) open source models to LanguageTool
|
|
81
|
+
|
|
82
|
+
## Demo
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+

|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
Demo server: [https://grammared-language-demo.rayliu.ca/v2](https://grammared-language-demo.rayliu.ca/v2)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
> **Warning:** Demo server is hosted on an Oracle ARM CPU server. It may be slow!
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
## Overview
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
## Limitations
|
|
98
|
+
- The correction will always show up as grammar corrections
|
|
99
|
+
- LanguageTool does not use the correction categories supplied by the remote servers
|
|
100
|
+
- No paraphrasing support
|
|
101
|
+
- LanguageTool clients request to a hard coded rewrite server url
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
## Supported Models
|
|
105
|
+
- `GECToR` models from [gotutiyan/gector](https://github.com/gotutiyan/gector)
|
|
106
|
+
- `text2text-generation` models
|
|
107
|
+
- for example, Grammarly's [CoEdIT](https://huggingface.co/collections/grammarly/coedit) models
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
## Quick Start
|
|
111
|
+
|
|
112
|
+
### Model Config
|
|
113
|
+
|
|
114
|
+
#### Config File
|
|
115
|
+
|
|
116
|
+
See `model_config.yaml` or `docker/default_model_config.yaml` as a template:
|
|
117
|
+
|
|
118
|
+
```yaml
|
|
119
|
+
gector_deberta_large:
|
|
120
|
+
type: gector
|
|
121
|
+
backend: triton
|
|
122
|
+
serving_config:
|
|
123
|
+
triton_host: triton-server
|
|
124
|
+
triton_port: 8001
|
|
125
|
+
pretrained_model_name_or_path: "gotutiyan/gector-deberta-large-5k"
|
|
126
|
+
triton_model_name: gector_deberta_large
|
|
127
|
+
device: cuda # cpu, cuda, or auto
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
#### Environment Variable
|
|
134
|
+
|
|
135
|
+
Or you can also set via environment variables (see `demo-docker-compose.yml` for real-world examples):
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
# See the 'environment:' section in demo-docker-compose.yml for full model config via env vars
|
|
140
|
+
# Example:
|
|
141
|
+
GRAMMARED_LANGUAGE__MODELS__GECTOR_DEBERTA_LARGE__TYPE=gector
|
|
142
|
+
GRAMMARED_LANGUAGE__MODELS__GECTOR_DEBERTA_LARGE__BACKEND=triton
|
|
143
|
+
GRAMMARED_LANGUAGE__MODELS__GECTOR_DEBERTA_LARGE__SERVING_CONFIG__TRITON_HOST=triton-server
|
|
144
|
+
...
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
For more, see the comments in `grammared_language/utils/config_parser.py`.
|
|
149
|
+
|
|
150
|
+
### LanguageTool
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
To enable remote servers with LanguageTool we will need a remote rule config file, which could be enabled via the `remoteRulesFile` option in the `server.properties` file
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## How config loading works
|
|
158
|
+
|
|
159
|
+
When the service starts, it loads model configuration in this order:
|
|
160
|
+
|
|
161
|
+
1. If a config file exists at `/model_config.yaml`, it loads that.
|
|
162
|
+
2. If not, and environment variables starting with `GRAMMARED_LANGUAGE__` are set, it loads config from those (see `demo-docker-compose.yml`).
|
|
163
|
+
3. If neither is found, it falls back to `/default_model_config.yaml`.
|
|
164
|
+
|
|
165
|
+
See the `get_config` function in `grammared_language/utils/config_parser.py` for details.
|
|
166
|
+
|
|
167
|
+
3. **Start everything with Docker Compose:**
|
|
168
|
+
```bash
|
|
169
|
+
docker-compose up --build
|
|
170
|
+
```
|
|
171
|
+
This runs Triton, the API, and (optionally) LanguageTool with remote rules.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Requirements
|
|
176
|
+
|
|
177
|
+
- Python 3.11+
|
|
178
|
+
- Docker (for containers)
|
|
179
|
+
- [Triton Inference Server](https://github.com/triton-inference-server/server) (for model serving)
|
|
180
|
+
- See `pyproject.toml` for details
|
|
181
|
+
|
|
182
|
+
## Usage
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
### LanguageTool Integration
|
|
186
|
+
|
|
187
|
+
To use remote servers with LanguageTool, set up a remote rule config file (see `example_language_tool_configs/remote-rule-config.json`) and add this to your `server.properties`:
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
remoteRulesFile=./remote-rule-config.json
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Then run:
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
java -cp languagetool-server.jar org.languagetool.server.HTTPServer --config server.properties
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
#### With Dockerized LanguageTool
|
|
200
|
+
|
|
201
|
+
If you're using the `meyay/languagetool` or `erikvl87/languagetool` Docker images, set:
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
langtool_remoteRulesFile=<remote file config path in docker>
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
See `docker-compose.yml` for a full example.
|
|
208
|
+
|
|
209
|
+
## Troubleshooting
|
|
210
|
+
|
|
211
|
+
- For model loading or inference errors, check Triton and API logs
|
|
212
|
+
- For LanguageTool integration, make sure your remote rule config is correct and accessible
|
|
213
|
+
|
|
214
|
+
## License
|
|
215
|
+
|
|
216
|
+
See [LICENSE.md](LICENSE.md).
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Credits & References
|
|
221
|
+
|
|
222
|
+
- [Ray Liu](https://github.com/rayliuca) (author/maintainer)
|
|
223
|
+
- [GECToR: Grammatical Error Correction: Tag, Not Rewrite](https://github.com/gotutiyan/gector)
|
|
224
|
+
- [Grammarly CoEdIT models](https://huggingface.co/collections/grammarly/coedit)
|
|
225
|
+
- [Triton Inference Server](https://github.com/triton-inference-server/server)
|
|
226
|
+
- [LanguageTool](https://languagetool.org/)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Grammared Language
|
|
2
|
+
|
|
3
|
+
Adding Grammarly (and other) open source models to LanguageTool
|
|
4
|
+
|
|
5
|
+
## Demo
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+

|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
Demo server: [https://grammared-language-demo.rayliu.ca/v2](https://grammared-language-demo.rayliu.ca/v2)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
> **Warning:** Demo server is hosted on an Oracle ARM CPU server. It may be slow!
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
## Overview
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## Limitations
|
|
21
|
+
- The correction will always show up as grammar corrections
|
|
22
|
+
- LanguageTool does not use the correction categories supplied by the remote servers
|
|
23
|
+
- No paraphrasing support
|
|
24
|
+
- LanguageTool clients request to a hard coded rewrite server url
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## Supported Models
|
|
28
|
+
- `GECToR` models from [gotutiyan/gector](https://github.com/gotutiyan/gector)
|
|
29
|
+
- `text2text-generation` models
|
|
30
|
+
- for example, Grammarly's [CoEdIT](https://huggingface.co/collections/grammarly/coedit) models
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
### Model Config
|
|
36
|
+
|
|
37
|
+
#### Config File
|
|
38
|
+
|
|
39
|
+
See `model_config.yaml` or `docker/default_model_config.yaml` as a template:
|
|
40
|
+
|
|
41
|
+
```yaml
|
|
42
|
+
gector_deberta_large:
|
|
43
|
+
type: gector
|
|
44
|
+
backend: triton
|
|
45
|
+
serving_config:
|
|
46
|
+
triton_host: triton-server
|
|
47
|
+
triton_port: 8001
|
|
48
|
+
pretrained_model_name_or_path: "gotutiyan/gector-deberta-large-5k"
|
|
49
|
+
triton_model_name: gector_deberta_large
|
|
50
|
+
device: cuda # cpu, cuda, or auto
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
#### Environment Variable
|
|
57
|
+
|
|
58
|
+
Or you can also set via environment variables (see `demo-docker-compose.yml` for real-world examples):
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
# See the 'environment:' section in demo-docker-compose.yml for full model config via env vars
|
|
63
|
+
# Example:
|
|
64
|
+
GRAMMARED_LANGUAGE__MODELS__GECTOR_DEBERTA_LARGE__TYPE=gector
|
|
65
|
+
GRAMMARED_LANGUAGE__MODELS__GECTOR_DEBERTA_LARGE__BACKEND=triton
|
|
66
|
+
GRAMMARED_LANGUAGE__MODELS__GECTOR_DEBERTA_LARGE__SERVING_CONFIG__TRITON_HOST=triton-server
|
|
67
|
+
...
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
For more, see the comments in `grammared_language/utils/config_parser.py`.
|
|
72
|
+
|
|
73
|
+
### LanguageTool
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
To enable remote servers with LanguageTool we will need a remote rule config file, which could be enabled via the `remoteRulesFile` option in the `server.properties` file
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## How config loading works
|
|
81
|
+
|
|
82
|
+
When the service starts, it loads model configuration in this order:
|
|
83
|
+
|
|
84
|
+
1. If a config file exists at `/model_config.yaml`, it loads that.
|
|
85
|
+
2. If not, and environment variables starting with `GRAMMARED_LANGUAGE__` are set, it loads config from those (see `demo-docker-compose.yml`).
|
|
86
|
+
3. If neither is found, it falls back to `/default_model_config.yaml`.
|
|
87
|
+
|
|
88
|
+
See the `get_config` function in `grammared_language/utils/config_parser.py` for details.
|
|
89
|
+
|
|
90
|
+
3. **Start everything with Docker Compose:**
|
|
91
|
+
```bash
|
|
92
|
+
docker-compose up --build
|
|
93
|
+
```
|
|
94
|
+
This runs Triton, the API, and (optionally) LanguageTool with remote rules.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Requirements
|
|
99
|
+
|
|
100
|
+
- Python 3.11+
|
|
101
|
+
- Docker (for containers)
|
|
102
|
+
- [Triton Inference Server](https://github.com/triton-inference-server/server) (for model serving)
|
|
103
|
+
- See `pyproject.toml` for details
|
|
104
|
+
|
|
105
|
+
## Usage
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
### LanguageTool Integration
|
|
109
|
+
|
|
110
|
+
To use remote servers with LanguageTool, set up a remote rule config file (see `example_language_tool_configs/remote-rule-config.json`) and add this to your `server.properties`:
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
remoteRulesFile=./remote-rule-config.json
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Then run:
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
java -cp languagetool-server.jar org.languagetool.server.HTTPServer --config server.properties
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
#### With Dockerized LanguageTool
|
|
123
|
+
|
|
124
|
+
If you're using the `meyay/languagetool` or `erikvl87/languagetool` Docker images, set:
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
langtool_remoteRulesFile=<remote file config path in docker>
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
See `docker-compose.yml` for a full example.
|
|
131
|
+
|
|
132
|
+
## Troubleshooting
|
|
133
|
+
|
|
134
|
+
- For model loading or inference errors, check Triton and API logs
|
|
135
|
+
- For LanguageTool integration, make sure your remote rule config is correct and accessible
|
|
136
|
+
|
|
137
|
+
## License
|
|
138
|
+
|
|
139
|
+
See [LICENSE.md](LICENSE.md).
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Credits & References
|
|
144
|
+
|
|
145
|
+
- [Ray Liu](https://github.com/rayliuca) (author/maintainer)
|
|
146
|
+
- [GECToR: Grammatical Error Correction: Tag, Not Rewrite](https://github.com/gotutiyan/gector)
|
|
147
|
+
- [Grammarly CoEdIT models](https://huggingface.co/collections/grammarly/coedit)
|
|
148
|
+
- [Triton Inference Server](https://github.com/triton-inference-server/server)
|
|
149
|
+
- [LanguageTool](https://languagetool.org/)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Grammared Language - Grammar correction tools and models."""
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
import pkgutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
# Programmatically discover and import all submodules
|
|
10
|
+
__all__ = ["__version__"]
|
|
11
|
+
_module_path = Path(__file__).parent
|
|
12
|
+
|
|
13
|
+
# Import subpackages
|
|
14
|
+
for _, module_name, is_pkg in pkgutil.iter_modules([str(_module_path)]):
|
|
15
|
+
if not module_name.startswith('_') and is_pkg:
|
|
16
|
+
try:
|
|
17
|
+
module = importlib.import_module(f'.{module_name}', package=__name__)
|
|
18
|
+
globals()[module_name] = module
|
|
19
|
+
__all__.append(module_name)
|
|
20
|
+
except ImportError as e:
|
|
21
|
+
# Skip modules that have missing dependencies
|
|
22
|
+
pass
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""API package for the grammared_language project."""
|
|
2
|
+
import importlib
|
|
3
|
+
import pkgutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Programmatically discover and import all modules
|
|
7
|
+
__all__ = []
|
|
8
|
+
_module_path = Path(__file__).parent
|
|
9
|
+
|
|
10
|
+
for _, module_name, _ in pkgutil.iter_modules([str(_module_path)]):
|
|
11
|
+
if not module_name.startswith('_'):
|
|
12
|
+
module = importlib.import_module(f'.{module_name}', package=__name__)
|
|
13
|
+
# Export all public attributes from the module
|
|
14
|
+
for attr_name in dir(module):
|
|
15
|
+
if not attr_name.startswith('_'):
|
|
16
|
+
attr = getattr(module, attr_name)
|
|
17
|
+
# Only export classes and functions defined in this module
|
|
18
|
+
if hasattr(attr, '__module__') and attr.__module__.startswith(__name__):
|
|
19
|
+
globals()[attr_name] = attr
|
|
20
|
+
__all__.append(attr_name)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Script to regenerate gRPC code from ml_server.proto.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python generate_grpc.py
|
|
7
|
+
|
|
8
|
+
This regenerates the protobuf and gRPC Python code from the LanguageTool
|
|
9
|
+
ml_server.proto definition.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
def fix_relative_imports(grpc_gen_dir):
|
|
18
|
+
"""Convert absolute imports to relative imports in generated files."""
|
|
19
|
+
py_files = list(grpc_gen_dir.glob("*.py"))
|
|
20
|
+
|
|
21
|
+
for file_path in py_files:
|
|
22
|
+
content = file_path.read_text()
|
|
23
|
+
# Replace "import ml_server_pb2" with "from . import ml_server_pb2"
|
|
24
|
+
updated_content = re.sub(
|
|
25
|
+
r'^import ml_server_pb2 as',
|
|
26
|
+
'from . import ml_server_pb2 as',
|
|
27
|
+
content,
|
|
28
|
+
flags=re.MULTILINE
|
|
29
|
+
)
|
|
30
|
+
if updated_content != content:
|
|
31
|
+
file_path.write_text(updated_content)
|
|
32
|
+
print(f"✓ Fixed imports in {file_path.name}")
|
|
33
|
+
|
|
34
|
+
def regenerate_grpc():
|
|
35
|
+
"""Regenerate gRPC code from ml_server.proto."""
|
|
36
|
+
src_dir = Path(__file__).parent.parent / "language_tool"
|
|
37
|
+
grpc_gen_dir = src_dir / "grpc_gen"
|
|
38
|
+
proto_file = src_dir / "ml_server.proto"
|
|
39
|
+
|
|
40
|
+
if not proto_file.exists():
|
|
41
|
+
print(f"Error: {proto_file} not found")
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
if not grpc_gen_dir.exists():
|
|
45
|
+
grpc_gen_dir.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
print(f"Created directory: {grpc_gen_dir}")
|
|
47
|
+
|
|
48
|
+
print(f"Regenerating gRPC code from {proto_file}...")
|
|
49
|
+
print(f"Output directory: {grpc_gen_dir}")
|
|
50
|
+
|
|
51
|
+
cmd = [
|
|
52
|
+
sys.executable, "-m", "grpc_tools.protoc",
|
|
53
|
+
f"-I{src_dir}",
|
|
54
|
+
f"--python_out={grpc_gen_dir}",
|
|
55
|
+
f"--pyi_out={grpc_gen_dir}",
|
|
56
|
+
f"--grpc_python_out={grpc_gen_dir}",
|
|
57
|
+
str(proto_file)
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
62
|
+
print("✓ gRPC code regenerated successfully")
|
|
63
|
+
|
|
64
|
+
# Fix relative imports
|
|
65
|
+
fix_relative_imports(grpc_gen_dir)
|
|
66
|
+
|
|
67
|
+
if result.stdout:
|
|
68
|
+
print(result.stdout)
|
|
69
|
+
return True
|
|
70
|
+
except subprocess.CalledProcessError as e:
|
|
71
|
+
print(f"✗ Error regenerating gRPC code:")
|
|
72
|
+
print(f"Command: {' '.join(cmd)}")
|
|
73
|
+
if e.stdout:
|
|
74
|
+
print("STDOUT:", e.stdout)
|
|
75
|
+
if e.stderr:
|
|
76
|
+
print("STDERR:", e.stderr)
|
|
77
|
+
return False
|
|
78
|
+
except FileNotFoundError:
|
|
79
|
+
print("✗ grpc_tools.protoc not found. Install with: pip install grpcio-tools")
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
if __name__ == "__main__":
|
|
83
|
+
success = regenerate_grpc()
|
|
84
|
+
sys.exit(0 if success else 1)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Auto-generated gRPC protocol buffer code from LanguageTool's ml_server.proto.
|
|
3
|
+
|
|
4
|
+
This module contains auto-generated code from ml_server.proto.
|
|
5
|
+
The files are named grammar_check_pb2* because that's what protoc generates
|
|
6
|
+
from the protobuf package name, but they implement the ml_server protocol.
|
|
7
|
+
|
|
8
|
+
Do not manually edit files in this module - regenerate using:
|
|
9
|
+
|
|
10
|
+
python generate_grpc.py
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from . import ml_server_pb2
|
|
15
|
+
from . import ml_server_pb2_grpc
|
|
16
|
+
|
|
17
|
+
__all__ = ['ml_server_pb2', 'ml_server_pb2_grpc']
|
|
18
|
+
|
|
19
|
+
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# source: ml_server.proto
|
|
5
|
+
# Protobuf Python Version: 5.27.2
|
|
6
|
+
"""Generated protocol buffer code."""
|
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
11
|
+
from google.protobuf.internal import builder as _builder
|
|
12
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
|
+
_runtime_version.Domain.PUBLIC,
|
|
14
|
+
5,
|
|
15
|
+
27,
|
|
16
|
+
2,
|
|
17
|
+
'',
|
|
18
|
+
'ml_server.proto'
|
|
19
|
+
)
|
|
20
|
+
# @@protoc_insertion_point(imports)
|
|
21
|
+
|
|
22
|
+
_sym_db = _symbol_database.Default()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0fml_server.proto\x12\x0clt_ml_server\"P\n\x0e\x41nalyzeRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x30\n\x07options\x18\x02 \x01(\x0b\x32\x1f.lt_ml_server.ProcessingOptions\"\xd7\x02\n\x11ProcessingOptions\x12\x10\n\x08language\x18\x01 \x01(\t\x12\x0f\n\x07tempOff\x18\x02 \x01(\x08\x12\x34\n\x05level\x18\x03 \x01(\x0e\x32%.lt_ml_server.ProcessingOptions.Level\x12\x0f\n\x07premium\x18\x04 \x01(\x08\x12\x13\n\x0b\x65nabledOnly\x18\x05 \x01(\x08\x12\x14\n\x0c\x65nabledRules\x18\x06 \x03(\t\x12\x15\n\rdisabledRules\x18\x07 \x03(\t\"\x95\x01\n\x05Level\x12\t\n\x05picky\x10\x00\x12\x0c\n\x08\x61\x63\x61\x64\x65mic\x10\x01\x12\x0b\n\x07\x63larity\x10\x02\x12\x10\n\x0cprofessional\x10\x03\x12\x0c\n\x08\x63reative\x10\x04\x12\x0c\n\x08\x63ustomer\x10\x05\x12\n\n\x06jobapp\x10\x06\x12\r\n\tobjective\x10\x07\x12\x0b\n\x07\x65legant\x10\x08\x12\x10\n\x0c\x64\x65\x66\x61ultLevel\x10\t\"D\n\x0f\x41nalyzeResponse\x12\x31\n\tsentences\x18\x01 \x03(\x0b\x32\x1e.lt_ml_server.AnalyzedSentence\"u\n\x0eProcessRequest\x12\x31\n\tsentences\x18\x01 \x03(\x0b\x32\x1e.lt_ml_server.AnalyzedSentence\x12\x30\n\x07options\x18\x02 \x01(\x0b\x32\x1f.lt_ml_server.ProcessingOptions\"`\n\x0fProcessResponse\x12\'\n\nrawMatches\x18\x01 \x03(\x0b\x32\x13.lt_ml_server.Match\x12$\n\x07matches\x18\x02 \x03(\x0b\x32\x13.lt_ml_server.Match\"v\n\x14\x41nalyzedMatchRequest\x12\x31\n\tsentences\x18\x01 \x03(\x0b\x32\x1e.lt_ml_server.AnalyzedSentence\x12\x14\n\x0cinputLogging\x18\x02 \x01(\x08\x12\x15\n\rtextSessionID\x18\x03 \x03(\x03\"U\n\x10\x41nalyzedSentence\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x33\n\x06tokens\x18\x02 \x03(\x0b\x32#.lt_ml_server.AnalyzedTokenReadings\"k\n\x15\x41nalyzedTokenReadings\x12-\n\x08readings\x18\x01 \x03(\x0b\x32\x1b.lt_ml_server.AnalyzedToken\x12\x11\n\tchunkTags\x18\x02 \x03(\t\x12\x10\n\x08startPos\x18\x03 \x01(\x05\"=\n\rAnalyzedToken\x12\r\n\x05token\x18\x01 \x01(\t\x12\x0e\n\x06posTag\x18\x02 \x01(\t\x12\r\n\x05lemma\x18\x03 \x01(\t\"\x81\x01\n\x15PostProcessingRequest\x12\x11\n\tsentences\x18\x01 \x03(\t\x12(\n\x07matches\x18\x02 \x03(\x0b\x32\x17.lt_ml_server.MatchList\x12\x14\n\x0cinputLogging\x18\x03 \x01(\x08\x12\x15\n\rtextSessionID\x18\x04 \x03(\x03\"N\n\x0cMatchRequest\x12\x11\n\tsentences\x18\x01 \x03(\t\x12\x14\n\x0cinputLogging\x18\x02 \x01(\x08\x12\x15\n\rtextSessionID\x18\x03 \x03(\x03\"A\n\rMatchResponse\x12\x30\n\x0fsentenceMatches\x18\x01 \x03(\x0b\x32\x17.lt_ml_server.MatchList\"1\n\tMatchList\x12$\n\x07matches\x18\x01 \x03(\x0b\x32\x13.lt_ml_server.Match\"\xae\x03\n\x05Match\x12\x0e\n\x06offset\x18\x01 \x01(\r\x12\x0e\n\x06length\x18\x02 \x01(\r\x12\n\n\x02id\x18\x03 \x01(\t\x12\x0e\n\x06sub_id\x18\x04 \x01(\t\x12\x13\n\x0bsuggestions\x18\x05 \x03(\t\x12\x17\n\x0fruleDescription\x18\x06 \x01(\t\x12\x18\n\x10matchDescription\x18\x07 \x01(\t\x12\x1d\n\x15matchShortDescription\x18\x08 \x01(\t\x12\x0b\n\x03url\x18\t \x01(\t\x12\x41\n\x15suggestedReplacements\x18\n \x03(\x0b\x32\".lt_ml_server.SuggestedReplacement\x12\x13\n\x0b\x61utoCorrect\x18\x0b \x01(\x08\x12+\n\x04type\x18\x0c \x01(\x0e\x32\x1d.lt_ml_server.Match.MatchType\x12\x1b\n\x13\x63ontextForSureMatch\x18\r \x01(\x11\x12 \n\x04rule\x18\x0e \x01(\x0b\x32\x12.lt_ml_server.Rule\"1\n\tMatchType\x12\x0f\n\x0bUnknownWord\x10\x00\x12\x08\n\x04Hint\x10\x01\x12\t\n\x05Other\x10\x02\"\xa9\x02\n\x04Rule\x12\x12\n\nsourceFile\x18\x01 \x01(\t\x12\x11\n\tissueType\x18\x02 \x01(\t\x12\x0f\n\x07tempOff\x18\x03 \x01(\x08\x12,\n\x08\x63\x61tegory\x18\x04 \x01(\x0b\x32\x1a.lt_ml_server.RuleCategory\x12\x11\n\tisPremium\x18\x05 \x01(\x08\x12$\n\x04tags\x18\x06 \x03(\x0e\x32\x16.lt_ml_server.Rule.Tag\"\x81\x01\n\x03Tag\x12\t\n\x05picky\x10\x00\x12\x0c\n\x08\x61\x63\x61\x64\x65mic\x10\x01\x12\x0b\n\x07\x63larity\x10\x02\x12\x10\n\x0cprofessional\x10\x03\x12\x0c\n\x08\x63reative\x10\x04\x12\x0c\n\x08\x63ustomer\x10\x05\x12\n\n\x06jobapp\x10\x06\x12\r\n\tobjective\x10\x07\x12\x0b\n\x07\x65legant\x10\x08\"(\n\x0cRuleCategory\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xe2\x01\n\x14SuggestedReplacement\x12\x13\n\x0breplacement\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x0e\n\x06suffix\x18\x03 \x01(\t\x12\x12\n\nconfidence\x18\x04 \x01(\x02\x12?\n\x04type\x18\x05 \x01(\x0e\x32\x31.lt_ml_server.SuggestedReplacement.SuggestionType\";\n\x0eSuggestionType\x12\x0b\n\x07\x44\x65\x66\x61ult\x10\x00\x12\x0f\n\x0bTranslation\x10\x01\x12\x0b\n\x07\x43urated\x10\x02\x32\xa6\x01\n\x10ProcessingServer\x12H\n\x07\x41nalyze\x12\x1c.lt_ml_server.AnalyzeRequest\x1a\x1d.lt_ml_server.AnalyzeResponse\"\x00\x12H\n\x07Process\x12\x1c.lt_ml_server.ProcessRequest\x1a\x1d.lt_ml_server.ProcessResponse\"\x00\x32\xa2\x01\n\x08MLServer\x12\x42\n\x05Match\x12\x1a.lt_ml_server.MatchRequest\x1a\x1b.lt_ml_server.MatchResponse\"\x00\x12R\n\rMatchAnalyzed\x12\".lt_ml_server.AnalyzedMatchRequest\x1a\x1b.lt_ml_server.MatchResponse\"\x00\x32\x65\n\x14PostProcessingServer\x12M\n\x07Process\x12#.lt_ml_server.PostProcessingRequest\x1a\x1b.lt_ml_server.MatchResponse\"\x00\x42*\n\x19org.languagetool.rules.mlB\rMLServerProtob\x06proto3')
|
|
28
|
+
|
|
29
|
+
_globals = globals()
|
|
30
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
31
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ml_server_pb2', _globals)
|
|
32
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
33
|
+
_globals['DESCRIPTOR']._loaded_options = None
|
|
34
|
+
_globals['DESCRIPTOR']._serialized_options = b'\n\031org.languagetool.rules.mlB\rMLServerProto'
|
|
35
|
+
_globals['_ANALYZEREQUEST']._serialized_start=33
|
|
36
|
+
_globals['_ANALYZEREQUEST']._serialized_end=113
|
|
37
|
+
_globals['_PROCESSINGOPTIONS']._serialized_start=116
|
|
38
|
+
_globals['_PROCESSINGOPTIONS']._serialized_end=459
|
|
39
|
+
_globals['_PROCESSINGOPTIONS_LEVEL']._serialized_start=310
|
|
40
|
+
_globals['_PROCESSINGOPTIONS_LEVEL']._serialized_end=459
|
|
41
|
+
_globals['_ANALYZERESPONSE']._serialized_start=461
|
|
42
|
+
_globals['_ANALYZERESPONSE']._serialized_end=529
|
|
43
|
+
_globals['_PROCESSREQUEST']._serialized_start=531
|
|
44
|
+
_globals['_PROCESSREQUEST']._serialized_end=648
|
|
45
|
+
_globals['_PROCESSRESPONSE']._serialized_start=650
|
|
46
|
+
_globals['_PROCESSRESPONSE']._serialized_end=746
|
|
47
|
+
_globals['_ANALYZEDMATCHREQUEST']._serialized_start=748
|
|
48
|
+
_globals['_ANALYZEDMATCHREQUEST']._serialized_end=866
|
|
49
|
+
_globals['_ANALYZEDSENTENCE']._serialized_start=868
|
|
50
|
+
_globals['_ANALYZEDSENTENCE']._serialized_end=953
|
|
51
|
+
_globals['_ANALYZEDTOKENREADINGS']._serialized_start=955
|
|
52
|
+
_globals['_ANALYZEDTOKENREADINGS']._serialized_end=1062
|
|
53
|
+
_globals['_ANALYZEDTOKEN']._serialized_start=1064
|
|
54
|
+
_globals['_ANALYZEDTOKEN']._serialized_end=1125
|
|
55
|
+
_globals['_POSTPROCESSINGREQUEST']._serialized_start=1128
|
|
56
|
+
_globals['_POSTPROCESSINGREQUEST']._serialized_end=1257
|
|
57
|
+
_globals['_MATCHREQUEST']._serialized_start=1259
|
|
58
|
+
_globals['_MATCHREQUEST']._serialized_end=1337
|
|
59
|
+
_globals['_MATCHRESPONSE']._serialized_start=1339
|
|
60
|
+
_globals['_MATCHRESPONSE']._serialized_end=1404
|
|
61
|
+
_globals['_MATCHLIST']._serialized_start=1406
|
|
62
|
+
_globals['_MATCHLIST']._serialized_end=1455
|
|
63
|
+
_globals['_MATCH']._serialized_start=1458
|
|
64
|
+
_globals['_MATCH']._serialized_end=1888
|
|
65
|
+
_globals['_MATCH_MATCHTYPE']._serialized_start=1839
|
|
66
|
+
_globals['_MATCH_MATCHTYPE']._serialized_end=1888
|
|
67
|
+
_globals['_RULE']._serialized_start=1891
|
|
68
|
+
_globals['_RULE']._serialized_end=2188
|
|
69
|
+
_globals['_RULE_TAG']._serialized_start=2059
|
|
70
|
+
_globals['_RULE_TAG']._serialized_end=2188
|
|
71
|
+
_globals['_RULECATEGORY']._serialized_start=2190
|
|
72
|
+
_globals['_RULECATEGORY']._serialized_end=2230
|
|
73
|
+
_globals['_SUGGESTEDREPLACEMENT']._serialized_start=2233
|
|
74
|
+
_globals['_SUGGESTEDREPLACEMENT']._serialized_end=2459
|
|
75
|
+
_globals['_SUGGESTEDREPLACEMENT_SUGGESTIONTYPE']._serialized_start=2400
|
|
76
|
+
_globals['_SUGGESTEDREPLACEMENT_SUGGESTIONTYPE']._serialized_end=2459
|
|
77
|
+
_globals['_PROCESSINGSERVER']._serialized_start=2462
|
|
78
|
+
_globals['_PROCESSINGSERVER']._serialized_end=2628
|
|
79
|
+
_globals['_MLSERVER']._serialized_start=2631
|
|
80
|
+
_globals['_MLSERVER']._serialized_end=2793
|
|
81
|
+
_globals['_POSTPROCESSINGSERVER']._serialized_start=2795
|
|
82
|
+
_globals['_POSTPROCESSINGSERVER']._serialized_end=2896
|
|
83
|
+
# @@protoc_insertion_point(module_scope)
|