centering-lgram 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- centering_lgram-1.0.0/LICENSE +21 -0
- centering_lgram-1.0.0/MANIFEST.in +30 -0
- centering_lgram-1.0.0/PKG-INFO +260 -0
- centering_lgram-1.0.0/README.md +206 -0
- centering_lgram-1.0.0/__init__.py +0 -0
- centering_lgram-1.0.0/centering_lgram.egg-info/PKG-INFO +260 -0
- centering_lgram-1.0.0/centering_lgram.egg-info/SOURCES.txt +93 -0
- centering_lgram-1.0.0/centering_lgram.egg-info/dependency_links.txt +1 -0
- centering_lgram-1.0.0/centering_lgram.egg-info/entry_points.txt +3 -0
- centering_lgram-1.0.0/centering_lgram.egg-info/requires.txt +24 -0
- centering_lgram-1.0.0/centering_lgram.egg-info/top_level.txt +1 -0
- centering_lgram-1.0.0/daily_life_dataset.txt +10000 -0
- centering_lgram-1.0.0/formatted_text_safe.txt +92 -0
- centering_lgram-1.0.0/lgram/__init__.py +271 -0
- centering_lgram-1.0.0/lgram/cli.py +224 -0
- centering_lgram-1.0.0/lgram/core.py +51 -0
- centering_lgram-1.0.0/lgram/models.py +7 -0
- centering_lgram-1.0.0/lgram/utils.py +165 -0
- centering_lgram-1.0.0/logs/daily_log_2025-04-25.txt +17 -0
- centering_lgram-1.0.0/logs/daily_log_2025-04-26.txt +3486 -0
- centering_lgram-1.0.0/logs/daily_log_2025-04-28.txt +10420 -0
- centering_lgram-1.0.0/logs/daily_log_2025-04-29.txt +859 -0
- centering_lgram-1.0.0/logs/daily_log_2025-04-30.txt +9282 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-01.txt +5335 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-02.txt +5524 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-03.txt +190 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-04.txt +1798 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-05.txt +369 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-07.txt +2387 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-08.txt +1370 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-09.txt +2060 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-10.txt +1880 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-11.txt +4401 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-12.txt +2663 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-13.txt +2819 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-14.txt +3202 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-15.txt +1625 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-16.txt +610 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-17.txt +60 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-18.txt +36 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-19.txt +25 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-20.txt +39 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-22.txt +54 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-23.txt +48 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-24.txt +15 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-30.txt +20 -0
- centering_lgram-1.0.0/logs/daily_log_2025-05-31.txt +26 -0
- centering_lgram-1.0.0/logs/daily_log_2025-07-04.txt +51 -0
- centering_lgram-1.0.0/logs/daily_log_2025-08-14.txt +6 -0
- centering_lgram-1.0.0/logs/daily_log_2025-08-16.txt +12 -0
- centering_lgram-1.0.0/logs/daily_log_2025-08-17.txt +57 -0
- centering_lgram-1.0.0/logs/daily_log_2025-08-19.txt +24 -0
- centering_lgram-1.0.0/logs/daily_log_2025-08-20.txt +18 -0
- centering_lgram-1.0.0/models/__init__.py +3 -0
- centering_lgram-1.0.0/models/analyze_transitions.py +39 -0
- centering_lgram-1.0.0/models/auto_correction_finder.py +106 -0
- centering_lgram-1.0.0/models/centering_model.py +59 -0
- centering_lgram-1.0.0/models/centering_theory.py +275 -0
- centering_lgram-1.0.0/models/chunk.py +942 -0
- centering_lgram-1.0.0/models/condordance_model.py +138 -0
- centering_lgram-1.0.0/models/dataset_generator.py +45 -0
- centering_lgram-1.0.0/models/dynamicngramparaphraser.py +399 -0
- centering_lgram-1.0.0/models/get_gender.py +108 -0
- centering_lgram-1.0.0/models/logs/daily_log_2025-07-04.txt +90 -0
- centering_lgram-1.0.0/models/logs/daily_log_2025-08-16.txt +6 -0
- centering_lgram-1.0.0/models/merge_lines.py +30 -0
- centering_lgram-1.0.0/models/model_summary.py +94 -0
- centering_lgram-1.0.0/models/paraphraser.py +98 -0
- centering_lgram-1.0.0/models/performance_optimization_plan.md +161 -0
- centering_lgram-1.0.0/models/preprocessor.py +55 -0
- centering_lgram-1.0.0/models/simple_language_model.py +940 -0
- centering_lgram-1.0.0/models/summary_collocations.py +57 -0
- centering_lgram-1.0.0/models/transition_analyzer.py +128 -0
- centering_lgram-1.0.0/models/utils.py +58 -0
- centering_lgram-1.0.0/models/wrapper.py +39 -0
- centering_lgram-1.0.0/ngrams/bigram_model.pkl +0 -0
- centering_lgram-1.0.0/ngrams/collocations.pkl +0 -0
- centering_lgram-1.0.0/ngrams/corrections.json +1312 -0
- centering_lgram-1.0.0/ngrams/fivegram_model.pkl +0 -0
- centering_lgram-1.0.0/ngrams/fourgram_model.pkl +0 -0
- centering_lgram-1.0.0/ngrams/language_model.pkl +0 -0
- centering_lgram-1.0.0/ngrams/more.txt +10000 -0
- centering_lgram-1.0.0/ngrams/sixgram_model.pkl +0 -0
- centering_lgram-1.0.0/ngrams/text_data.txt +37579 -0
- centering_lgram-1.0.0/ngrams/thematic_wiki.txt +1540 -0
- centering_lgram-1.0.0/ngrams/trigram_model.pkl +0 -0
- centering_lgram-1.0.0/ngrams/word_embedding_model.pt +0 -0
- centering_lgram-1.0.0/output.txt +3 -0
- centering_lgram-1.0.0/pyproject.toml +104 -0
- centering_lgram-1.0.0/requirements.txt +0 -0
- centering_lgram-1.0.0/setup.cfg +4 -0
- centering_lgram-1.0.0/setup.py +95 -0
- centering_lgram-1.0.0/test_t5.py +0 -0
- centering_lgram-1.0.0/tests/__init__.py +1 -0
- centering_lgram-1.0.0/tests/test_lgram.py +59 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 iatagun
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Include important files in the distribution
|
|
2
|
+
include README.md
|
|
3
|
+
include LICENSE
|
|
4
|
+
include requirements.txt
|
|
5
|
+
include pyproject.toml
|
|
6
|
+
|
|
7
|
+
# Include all Python files
|
|
8
|
+
recursive-include lgram *.py
|
|
9
|
+
|
|
10
|
+
# Include model and data files
|
|
11
|
+
recursive-include models *.py
|
|
12
|
+
recursive-include ngrams *.pkl *.json *.txt
|
|
13
|
+
recursive-include logs *.txt
|
|
14
|
+
|
|
15
|
+
# Exclude unnecessary files
|
|
16
|
+
exclude *.pyc
|
|
17
|
+
exclude .DS_Store
|
|
18
|
+
exclude .gitignore
|
|
19
|
+
recursive-exclude * __pycache__
|
|
20
|
+
recursive-exclude * *.py[co]
|
|
21
|
+
recursive-exclude * .DS_Store
|
|
22
|
+
|
|
23
|
+
# Include documentation
|
|
24
|
+
recursive-include docs *.md *.rst *.txt
|
|
25
|
+
|
|
26
|
+
# Include tests if present
|
|
27
|
+
recursive-include tests *.py
|
|
28
|
+
|
|
29
|
+
# Include examples if present
|
|
30
|
+
recursive-include examples *.py *.txt *.md
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: centering-lgram
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Advanced Language Model with Centering Theory for Coherent Text Generation
|
|
5
|
+
Home-page: https://github.com/iatagun/Lgram
|
|
6
|
+
Author: İlker Atagün
|
|
7
|
+
Author-email: İlker Atagün <ilker.atagun@gmail.com>
|
|
8
|
+
Project-URL: Homepage, https://github.com/iatagun/Lgram
|
|
9
|
+
Project-URL: Bug Reports, https://github.com/iatagun/Lgram/issues
|
|
10
|
+
Project-URL: Source, https://github.com/iatagun/Lgram
|
|
11
|
+
Project-URL: Documentation, https://github.com/iatagun/Lgram/blob/main/README.md
|
|
12
|
+
Keywords: nlp,natural language processing,text generation,centering theory,coherence,language model,n-gram,discourse analysis,computational linguistics
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Operating System :: OS Independent
|
|
26
|
+
Requires-Python: >=3.8
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: torch>=1.9.0
|
|
30
|
+
Requires-Dist: transformers>=4.20.0
|
|
31
|
+
Requires-Dist: spacy>=3.4.0
|
|
32
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
33
|
+
Requires-Dist: scipy>=1.7.0
|
|
34
|
+
Requires-Dist: numpy>=1.21.0
|
|
35
|
+
Requires-Dist: tqdm>=4.62.0
|
|
36
|
+
Provides-Extra: django
|
|
37
|
+
Requires-Dist: django>=3.2.0; extra == "django"
|
|
38
|
+
Provides-Extra: full
|
|
39
|
+
Requires-Dist: django>=3.2.0; extra == "full"
|
|
40
|
+
Requires-Dist: jupyter>=1.0.0; extra == "full"
|
|
41
|
+
Requires-Dist: matplotlib>=3.5.0; extra == "full"
|
|
42
|
+
Requires-Dist: plotly>=5.0.0; extra == "full"
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
46
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=0.950; extra == "dev"
|
|
48
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
49
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
50
|
+
Dynamic: author
|
|
51
|
+
Dynamic: home-page
|
|
52
|
+
Dynamic: license-file
|
|
53
|
+
Dynamic: requires-python
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# 🧠 Centering-Lgram: Advanced Language Model with Centering Theory
|
|
57
|
+
|
|
58
|
+
[](https://badge.fury.io/py/centering-lgram)
|
|
59
|
+
[](https://pypi.org/project/centering-lgram/)
|
|
60
|
+
[](https://opensource.org/licenses/MIT)
|
|
61
|
+
[](https://pepy.tech/project/centering-lgram)
|
|
62
|
+
|
|
63
|
+
A sophisticated natural language processing library that combines **N-gram language models** with **Centering Theory** to generate coherent and contextually appropriate text. Lgram provides state-of-the-art discourse coherence analysis and text generation capabilities.
|
|
64
|
+
|
|
65
|
+
## ✨ Key Features
|
|
66
|
+
|
|
67
|
+
- **🎯 Coherent Text Generation**: Advanced n-gram models (2-gram to 6-gram) with centering theory
|
|
68
|
+
- **🧠 Discourse Analysis**: Implementation of centering theory for coherence evaluation
|
|
69
|
+
- **🔧 Grammar Correction**: T5 transformer-based grammar and style correction
|
|
70
|
+
- **🌐 Semantic Analysis**: SpaCy-powered semantic relationship detection
|
|
71
|
+
- **📊 Collocation Analysis**: Statistical collocation and thematic consistency
|
|
72
|
+
- **⚡ Django Ready**: Production-ready integration with Django framework
|
|
73
|
+
- **🎨 CLI Interface**: Easy-to-use command line tools
|
|
74
|
+
- **📈 Progress Tracking**: Visual progress bars and detailed logging
|
|
75
|
+
|
|
76
|
+
## 🚀 Quick Start
|
|
77
|
+
|
|
78
|
+
### Installation
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Install from PyPI
|
|
82
|
+
pip install centering-lgram
|
|
83
|
+
|
|
84
|
+
# Install with all optional dependencies
|
|
85
|
+
pip install centering-lgram[full]
|
|
86
|
+
|
|
87
|
+
# Install for Django projects
|
|
88
|
+
pip install centering-lgram[django]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Basic Usage
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from lgram import create_language_model
|
|
95
|
+
|
|
96
|
+
# Create or load a language model
|
|
97
|
+
model = create_language_model()
|
|
98
|
+
|
|
99
|
+
# Generate coherent text
|
|
100
|
+
text = model.generate_text(
|
|
101
|
+
num_sentences=3,
|
|
102
|
+
input_words=["The", "weather"],
|
|
103
|
+
length=12,
|
|
104
|
+
use_progress_bar=True
|
|
105
|
+
)
|
|
106
|
+
print(text)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Advanced Usage with Centering Theory
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from lgram import EnhancedLanguageModel
|
|
113
|
+
|
|
114
|
+
# Initialize with custom settings
|
|
115
|
+
model = EnhancedLanguageModel(n=3) # Use 3-gram model
|
|
116
|
+
|
|
117
|
+
# Generate with centering theory for better coherence
|
|
118
|
+
coherent_text = model.generate_text_with_centering(
|
|
119
|
+
num_sentences=5,
|
|
120
|
+
input_words=["She", "founded"],
|
|
121
|
+
length=15
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Apply grammar correction
|
|
125
|
+
corrected_text = model.correct_grammar_t5(coherent_text)
|
|
126
|
+
print(corrected_text)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Command Line Interface
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Generate text from command line
|
|
133
|
+
centering-lgram generate --input "The weather today" --sentences 3 --correct
|
|
134
|
+
|
|
135
|
+
# Use centering theory
|
|
136
|
+
centering-lgram generate --input "She founded" --sentences 5 --centering --progress
|
|
137
|
+
|
|
138
|
+
# Show system information
|
|
139
|
+
centering-lgram info
|
|
140
|
+
|
|
141
|
+
# Train a new model
|
|
142
|
+
centering-lgram train --text-file data.txt --model-file my_model.pkl
|
|
143
|
+
|
|
144
|
+
# Backward compatibility - old command still works
|
|
145
|
+
lgram generate --input "Hello world" --sentences 3
|
|
146
|
+
```
|
|
147
|
+
- Output: Transition scores, types, and detailed pairwise information
|
|
148
|
+
|
|
149
|
+
### 2. `TransitionAnalyzer`
|
|
150
|
+
Analyzes sentence pairs to extract:
|
|
151
|
+
- **Noun phrases** (`noun_chunks`)
|
|
152
|
+
- **Anaphoric relations**
|
|
153
|
+
- **Transition types**
|
|
154
|
+
|
|
155
|
+
This analysis supports both statistical and linguistic evaluation.
|
|
156
|
+
|
|
157
|
+
### 3. `EnhancedLanguageModel`
|
|
158
|
+
Generates context-aware, fluent sentences using a **Kneser-Ney smoothed n-gram model** enhanced with POS tagging.
|
|
159
|
+
|
|
160
|
+
#### Key Features:
|
|
161
|
+
- Generation using 2- to 6-gram models
|
|
162
|
+
- Syntactic analysis and centering using `SpaCy`
|
|
163
|
+
- Linguistic center tracking via `get_center_from_sentence`
|
|
164
|
+
- Contextual word selection via `choose_word_with_context`
|
|
165
|
+
- Completeness check via `is_complete_thought`
|
|
166
|
+
- Theme consistency via `post_process_sentences`
|
|
167
|
+
|
|
168
|
+
### 4. `dynamicngramparaphraser.py`
|
|
169
|
+
Performs **contextual paraphrasing** based on n-grams. Selects the **best alternative match** for each word depending on its position and syntactic role.
|
|
170
|
+
|
|
171
|
+
- Supports **dependency-based reordering** (`reorder_sentence`)
|
|
172
|
+
- Combines vector similarity and frequency with `select_best_match`
|
|
173
|
+
|
|
174
|
+
### 5. `analyze_transitions.py`
|
|
175
|
+
Invokes the `CenteringModel` to analyze all sentence transitions in a text and returns the results as a `DataFrame`, including:
|
|
176
|
+
- `current_sentence`
|
|
177
|
+
- `next_sentence`
|
|
178
|
+
- `transition_type`
|
|
179
|
+
- `score`
|
|
180
|
+
- `total_score`
|
|
181
|
+
|
|
182
|
+
## 🗂 File Structure
|
|
183
|
+
|
|
184
|
+
.
|
|
185
|
+
├── analyze_transitions.py
|
|
186
|
+
├── centering_model.py
|
|
187
|
+
├── chunk.py
|
|
188
|
+
├── dynamicngramparaphraser.py
|
|
189
|
+
├── simple_language_model.py
|
|
190
|
+
├── get_gender.py
|
|
191
|
+
├── transition_analyzer.py
|
|
192
|
+
├── corrections.json
|
|
193
|
+
├── ngrams/
|
|
194
|
+
│ ├── bigram_model.pkl
|
|
195
|
+
│ ├── trigram_model.pkl
|
|
196
|
+
│ ├── fourgram_model.pkl
|
|
197
|
+
│ ├── fivegram_model.pkl
|
|
198
|
+
│ ├── sixgram_model.pkl
|
|
199
|
+
│ └── text_data.txt
|
|
200
|
+
|
|
201
|
+
## 🚀 Usage Example
|
|
202
|
+
|
|
203
|
+
### Transition Analysis
|
|
204
|
+
```python
|
|
205
|
+
from analyze_transitions import analyze_transitions
|
|
206
|
+
|
|
207
|
+
text = "Least of all do they thus dispose of the murdered. Guardsman take small farmer well who loathe every precaution the officer."
|
|
208
|
+
df = analyze_transitions(text)
|
|
209
|
+
print(df)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### Sentence Generation
|
|
213
|
+
```python
|
|
214
|
+
from simple_language_model import EnhancedLanguageModel
|
|
215
|
+
|
|
216
|
+
model = EnhancedLanguageModel("Some training text.")
|
|
217
|
+
sentence = model.generate_sentence(start_words=["The", "man"], length=12)
|
|
218
|
+
print("Generated:", sentence)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Coherence Report
|
|
222
|
+
```python
|
|
223
|
+
sentences = ["The man left.", "She stayed at home."]
|
|
224
|
+
cleaned_sentences, report = model.post_process_sentences(sentences)
|
|
225
|
+
print(report)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## 🛠 Requirements
|
|
229
|
+
|
|
230
|
+
- Python 3.8+
|
|
231
|
+
- `spacy`
|
|
232
|
+
- `numpy`
|
|
233
|
+
- `scikit-learn`
|
|
234
|
+
- `tqdm`
|
|
235
|
+
- `pandas`
|
|
236
|
+
|
|
237
|
+
### SpaCy Model
|
|
238
|
+
```bash
|
|
239
|
+
python -m spacy download en_core_web_lg
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## 🎯 Purpose
|
|
243
|
+
|
|
244
|
+
This project provides a powerful infrastructure for researchers, developers, and linguistics enthusiasts working in **textual coherence**, **discourse transition**, and **automated sentence generation**. Whether you're generating text, analyzing transitions, or evaluating textual consistency — this is your **linguistic lab**.
|
|
245
|
+
|
|
246
|
+
## Outputs
|
|
247
|
+
|
|
248
|
+
The victim must get to go to the woods and learn the true meaning of the word, "she continued. " According to Gallup, in 2002, they would not say willingly. Your opinion is needed. At some moment he might not think of himself as a genius, but as the mere fact of being alive. He chose art.
|
|
249
|
+
|
|
250
|
+
The murder of Agamemnon would send shivers down your backbone. It is no use trying to suppress that side of myself. In her dead mind there is nothing which appears to her as being outside, and what is outside is what He has left behind. There is evidence to support the view that he has seen such a mention as an occasional burst of electricity and, I am sure, no trace of it. His plan would have been to attack the house and burn down the garden with some kind of fire. While her husband was away, she had made an excuse for being late. The size of these ships is unknown.
|
|
251
|
+
|
|
252
|
+
The murderer will say that this way if indeed the former is the case. In spring of 1992, I would have noticed. The gun swung and he asked the woman what she wanted, but she did not say anything. With the information that we have, we are able to attach more importance to what we do not need. To be near the rest of the world in this case is a matter of great importance.
|
|
253
|
+
|
|
254
|
+
The crime was committed by an old man. Indeed, so bad is the weather that we sometimes talk about the reason why he was in Berlin and the city itself. The reason is that he is a rich man. Grass has not grown in this day and age, so it is not suitable for making friends or building communities. I find myself in a bar and ask for a drink when I see a new world around me.
|
|
255
|
+
|
|
256
|
+
The time has come for you to do something in return and to observe what is happening in the present as closely as possible to one possibility. Mr. Phillips's protest was that softening the blow by a gentle breeze and creating a myriad of other sounds was wrong for the foot to be moved. He just pointed out that the world around him had gone wrong and that the course of events in his country would be too difficult for him to change. Of course, Miss Diana told me that she found it very pleasant to hear that he would return to private life. So say much as well as a bird would be a woman's best friend.
|
|
257
|
+
|
|
258
|
+
The victims of the fire could not have been more remarkable and sad to see. The victim wants to say that he has a young wife who loves him. It seems to him that his life has been entirely smothered by his work. He is on the verge of death and reaches for the moon.
|
|
259
|
+
|
|
260
|
+
The murder of Lady Godfrey does not make the heart race as much as anybody else. The murderer is the light sleeper and nothing less than Mr. Bingley will do. It is out of the question whether his own vanity has done much good for him. Is it rather subtler to say that some things are better than others? I like it pretty well and am willing to give it a try.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
|
|
2
|
+
# 🧠 Centering-Lgram: Advanced Language Model with Centering Theory
|
|
3
|
+
|
|
4
|
+
[](https://badge.fury.io/py/centering-lgram)
|
|
5
|
+
[](https://pypi.org/project/centering-lgram/)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
[](https://pepy.tech/project/centering-lgram)
|
|
8
|
+
|
|
9
|
+
A sophisticated natural language processing library that combines **N-gram language models** with **Centering Theory** to generate coherent and contextually appropriate text. Lgram provides state-of-the-art discourse coherence analysis and text generation capabilities.
|
|
10
|
+
|
|
11
|
+
## ✨ Key Features
|
|
12
|
+
|
|
13
|
+
- **🎯 Coherent Text Generation**: Advanced n-gram models (2-gram to 6-gram) with centering theory
|
|
14
|
+
- **🧠 Discourse Analysis**: Implementation of centering theory for coherence evaluation
|
|
15
|
+
- **🔧 Grammar Correction**: T5 transformer-based grammar and style correction
|
|
16
|
+
- **🌐 Semantic Analysis**: SpaCy-powered semantic relationship detection
|
|
17
|
+
- **📊 Collocation Analysis**: Statistical collocation and thematic consistency
|
|
18
|
+
- **⚡ Django Ready**: Production-ready integration with Django framework
|
|
19
|
+
- **🎨 CLI Interface**: Easy-to-use command line tools
|
|
20
|
+
- **📈 Progress Tracking**: Visual progress bars and detailed logging
|
|
21
|
+
|
|
22
|
+
## 🚀 Quick Start
|
|
23
|
+
|
|
24
|
+
### Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Install from PyPI
|
|
28
|
+
pip install centering-lgram
|
|
29
|
+
|
|
30
|
+
# Install with all optional dependencies
|
|
31
|
+
pip install centering-lgram[full]
|
|
32
|
+
|
|
33
|
+
# Install for Django projects
|
|
34
|
+
pip install centering-lgram[django]
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Basic Usage
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from lgram import create_language_model
|
|
41
|
+
|
|
42
|
+
# Create or load a language model
|
|
43
|
+
model = create_language_model()
|
|
44
|
+
|
|
45
|
+
# Generate coherent text
|
|
46
|
+
text = model.generate_text(
|
|
47
|
+
num_sentences=3,
|
|
48
|
+
input_words=["The", "weather"],
|
|
49
|
+
length=12,
|
|
50
|
+
use_progress_bar=True
|
|
51
|
+
)
|
|
52
|
+
print(text)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Advanced Usage with Centering Theory
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from lgram import EnhancedLanguageModel
|
|
59
|
+
|
|
60
|
+
# Initialize with custom settings
|
|
61
|
+
model = EnhancedLanguageModel(n=3) # Use 3-gram model
|
|
62
|
+
|
|
63
|
+
# Generate with centering theory for better coherence
|
|
64
|
+
coherent_text = model.generate_text_with_centering(
|
|
65
|
+
num_sentences=5,
|
|
66
|
+
input_words=["She", "founded"],
|
|
67
|
+
length=15
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Apply grammar correction
|
|
71
|
+
corrected_text = model.correct_grammar_t5(coherent_text)
|
|
72
|
+
print(corrected_text)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Command Line Interface
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Generate text from command line
|
|
79
|
+
centering-lgram generate --input "The weather today" --sentences 3 --correct
|
|
80
|
+
|
|
81
|
+
# Use centering theory
|
|
82
|
+
centering-lgram generate --input "She founded" --sentences 5 --centering --progress
|
|
83
|
+
|
|
84
|
+
# Show system information
|
|
85
|
+
centering-lgram info
|
|
86
|
+
|
|
87
|
+
# Train a new model
|
|
88
|
+
centering-lgram train --text-file data.txt --model-file my_model.pkl
|
|
89
|
+
|
|
90
|
+
# Backward compatibility - old command still works
|
|
91
|
+
lgram generate --input "Hello world" --sentences 3
|
|
92
|
+
```
|
|
93
|
+
- Output: Transition scores, types, and detailed pairwise information
|
|
94
|
+
|
|
95
|
+
### 2. `TransitionAnalyzer`
|
|
96
|
+
Analyzes sentence pairs to extract:
|
|
97
|
+
- **Noun phrases** (`noun_chunks`)
|
|
98
|
+
- **Anaphoric relations**
|
|
99
|
+
- **Transition types**
|
|
100
|
+
|
|
101
|
+
This analysis supports both statistical and linguistic evaluation.
|
|
102
|
+
|
|
103
|
+
### 3. `EnhancedLanguageModel`
|
|
104
|
+
Generates context-aware, fluent sentences using a **Kneser-Ney smoothed n-gram model** enhanced with POS tagging.
|
|
105
|
+
|
|
106
|
+
#### Key Features:
|
|
107
|
+
- Generation using 2- to 6-gram models
|
|
108
|
+
- Syntactic analysis and centering using `SpaCy`
|
|
109
|
+
- Linguistic center tracking via `get_center_from_sentence`
|
|
110
|
+
- Contextual word selection via `choose_word_with_context`
|
|
111
|
+
- Completeness check via `is_complete_thought`
|
|
112
|
+
- Theme consistency via `post_process_sentences`
|
|
113
|
+
|
|
114
|
+
### 4. `dynamicngramparaphraser.py`
|
|
115
|
+
Performs **contextual paraphrasing** based on n-grams. Selects the **best alternative match** for each word depending on its position and syntactic role.
|
|
116
|
+
|
|
117
|
+
- Supports **dependency-based reordering** (`reorder_sentence`)
|
|
118
|
+
- Combines vector similarity and frequency with `select_best_match`
|
|
119
|
+
|
|
120
|
+
### 5. `analyze_transitions.py`
|
|
121
|
+
Invokes the `CenteringModel` to analyze all sentence transitions in a text and returns the results as a `DataFrame`, including:
|
|
122
|
+
- `current_sentence`
|
|
123
|
+
- `next_sentence`
|
|
124
|
+
- `transition_type`
|
|
125
|
+
- `score`
|
|
126
|
+
- `total_score`
|
|
127
|
+
|
|
128
|
+
## 🗂 File Structure
|
|
129
|
+
|
|
130
|
+
.
|
|
131
|
+
├── analyze_transitions.py
|
|
132
|
+
├── centering_model.py
|
|
133
|
+
├── chunk.py
|
|
134
|
+
├── dynamicngramparaphraser.py
|
|
135
|
+
├── simple_language_model.py
|
|
136
|
+
├── get_gender.py
|
|
137
|
+
├── transition_analyzer.py
|
|
138
|
+
├── corrections.json
|
|
139
|
+
├── ngrams/
|
|
140
|
+
│ ├── bigram_model.pkl
|
|
141
|
+
│ ├── trigram_model.pkl
|
|
142
|
+
│ ├── fourgram_model.pkl
|
|
143
|
+
│ ├── fivegram_model.pkl
|
|
144
|
+
│ ├── sixgram_model.pkl
|
|
145
|
+
│ └── text_data.txt
|
|
146
|
+
|
|
147
|
+
## 🚀 Usage Example
|
|
148
|
+
|
|
149
|
+
### Transition Analysis
|
|
150
|
+
```python
|
|
151
|
+
from analyze_transitions import analyze_transitions
|
|
152
|
+
|
|
153
|
+
text = "Least of all do they thus dispose of the murdered. Guardsman take small farmer well who loathe every precaution the officer."
|
|
154
|
+
df = analyze_transitions(text)
|
|
155
|
+
print(df)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Sentence Generation
|
|
159
|
+
```python
|
|
160
|
+
from simple_language_model import EnhancedLanguageModel
|
|
161
|
+
|
|
162
|
+
model = EnhancedLanguageModel("Some training text.")
|
|
163
|
+
sentence = model.generate_sentence(start_words=["The", "man"], length=12)
|
|
164
|
+
print("Generated:", sentence)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Coherence Report
|
|
168
|
+
```python
|
|
169
|
+
sentences = ["The man left.", "She stayed at home."]
|
|
170
|
+
cleaned_sentences, report = model.post_process_sentences(sentences)
|
|
171
|
+
print(report)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 🛠 Requirements
|
|
175
|
+
|
|
176
|
+
- Python 3.8+
|
|
177
|
+
- `spacy`
|
|
178
|
+
- `numpy`
|
|
179
|
+
- `scikit-learn`
|
|
180
|
+
- `tqdm`
|
|
181
|
+
- `pandas`
|
|
182
|
+
|
|
183
|
+
### SpaCy Model
|
|
184
|
+
```bash
|
|
185
|
+
python -m spacy download en_core_web_lg
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## 🎯 Purpose
|
|
189
|
+
|
|
190
|
+
This project provides a powerful infrastructure for researchers, developers, and linguistics enthusiasts working in **textual coherence**, **discourse transition**, and **automated sentence generation**. Whether you're generating text, analyzing transitions, or evaluating textual consistency — this is your **linguistic lab**.
|
|
191
|
+
|
|
192
|
+
## Outputs
|
|
193
|
+
|
|
194
|
+
The victim must get to go to the woods and learn the true meaning of the word, "she continued. " According to Gallup, in 2002, they would not say willingly. Your opinion is needed. At some moment he might not think of himself as a genius, but as the mere fact of being alive. He chose art.
|
|
195
|
+
|
|
196
|
+
The murder of Agamemnon would send shivers down your backbone. It is no use trying to suppress that side of myself. In her dead mind there is nothing which appears to her as being outside, and what is outside is what He has left behind. There is evidence to support the view that he has seen such a mention as an occasional burst of electricity and, I am sure, no trace of it. His plan would have been to attack the house and burn down the garden with some kind of fire. While her husband was away, she had made an excuse for being late. The size of these ships is unknown.
|
|
197
|
+
|
|
198
|
+
The murderer will say that this way if indeed the former is the case. In spring of 1992, I would have noticed. The gun swung and he asked the woman what she wanted, but she did not say anything. With the information that we have, we are able to attach more importance to what we do not need. To be near the rest of the world in this case is a matter of great importance.
|
|
199
|
+
|
|
200
|
+
The crime was committed by an old man. Indeed, so bad is the weather that we sometimes talk about the reason why he was in Berlin and the city itself. The reason is that he is a rich man. Grass has not grown in this day and age, so it is not suitable for making friends or building communities. I find myself in a bar and ask for a drink when I see a new world around me.
|
|
201
|
+
|
|
202
|
+
The time has come for you to do something in return and to observe what is happening in the present as closely as possible to one possibility. Mr. Phillips's protest was that softening the blow by a gentle breeze and creating a myriad of other sounds was wrong for the foot to be moved. He just pointed out that the world around him had gone wrong and that the course of events in his country would be too difficult for him to change. Of course, Miss Diana told me that she found it very pleasant to hear that he would return to private life. So say much as well as a bird would be a woman's best friend.
|
|
203
|
+
|
|
204
|
+
The victims of the fire could not have been more remarkable and sad to see. The victim wants to say that he has a young wife who loves him. It seems to him that his life has been entirely smothered by his work. He is on the verge of death and reaches for the moon.
|
|
205
|
+
|
|
206
|
+
The murder of Lady Godfrey does not make the heart race as much as anybody else. The murderer is the light sleeper and nothing less than Mr. Bingley will do. It is out of the question whether his own vanity has done much good for him. Is it rather subtler to say that some things are better than others? I like it pretty well and am willing to give it a try.
|
|
File without changes
|