fast-sentence-segment 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/METADATA +55 -17
- {fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/RECORD +5 -5
- {fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/WHEEL +0 -0
- {fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/entry_points.txt +0 -0
- {fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fast-sentence-segment
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: Fast and Efficient Sentence Segmentation
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -67,12 +67,21 @@ python -m spacy download en_core_web_sm
|
|
|
67
67
|
```python
|
|
68
68
|
from fast_sentence_segment import segment_text
|
|
69
69
|
|
|
70
|
-
text = "
|
|
70
|
+
text = "Do you like Dr. Who? I prefer Dr. Strange! Mr. T is also cool."
|
|
71
71
|
|
|
72
|
-
results = segment_text(text)
|
|
73
|
-
# Returns: [['Here is a Dr. who says something.', 'And then again, what else?', "I don't know.", 'Do you?']]
|
|
72
|
+
results = segment_text(text, flatten=True)
|
|
74
73
|
```
|
|
75
74
|
|
|
75
|
+
```json
|
|
76
|
+
[
|
|
77
|
+
"Do you like Dr. Who?",
|
|
78
|
+
"I prefer Dr. Strange!",
|
|
79
|
+
"Mr. T is also cool."
|
|
80
|
+
]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Notice how "Dr. Who?" stays together as a single sentence—the library correctly recognizes that a title followed by a single-word name ending in `?` or `!` is a name reference, not a sentence boundary.
|
|
84
|
+
|
|
76
85
|
## Usage
|
|
77
86
|
|
|
78
87
|
### Basic Segmentation
|
|
@@ -82,16 +91,24 @@ The `segment_text` function returns a list of lists, where each inner list repre
|
|
|
82
91
|
```python
|
|
83
92
|
from fast_sentence_segment import segment_text
|
|
84
93
|
|
|
85
|
-
text = """
|
|
94
|
+
text = """Gandalf spoke softly. "All we have to decide is what to do with the time given us."
|
|
86
95
|
|
|
87
|
-
|
|
96
|
+
Frodo nodded. The weight of the Ring pressed against his chest."""
|
|
88
97
|
|
|
89
98
|
results = segment_text(text)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
[
|
|
103
|
+
[
|
|
104
|
+
"Gandalf spoke softly.",
|
|
105
|
+
"\"All we have to decide is what to do with the time given us.\"."
|
|
106
|
+
],
|
|
107
|
+
[
|
|
108
|
+
"Frodo nodded.",
|
|
109
|
+
"The weight of the Ring pressed against his chest."
|
|
110
|
+
]
|
|
111
|
+
]
|
|
95
112
|
```
|
|
96
113
|
|
|
97
114
|
### Flattened Output
|
|
@@ -99,8 +116,17 @@ results = segment_text(text)
|
|
|
99
116
|
If you don't need paragraph boundaries, use the `flatten` parameter:
|
|
100
117
|
|
|
101
118
|
```python
|
|
119
|
+
text = "At 9 a.m. the hobbits set out. By 3 p.m. they reached Rivendell. Mr. Frodo was exhausted."
|
|
120
|
+
|
|
102
121
|
results = segment_text(text, flatten=True)
|
|
103
|
-
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
[
|
|
126
|
+
"At 9 a.m. the hobbits set out.",
|
|
127
|
+
"By 3 p.m. they reached Rivendell.",
|
|
128
|
+
"Mr. Frodo was exhausted."
|
|
129
|
+
]
|
|
104
130
|
```
|
|
105
131
|
|
|
106
132
|
### Direct Segmenter Access
|
|
@@ -120,16 +146,28 @@ Segment text directly from the terminal:
|
|
|
120
146
|
|
|
121
147
|
```bash
|
|
122
148
|
# Direct text input
|
|
123
|
-
|
|
149
|
+
echo "Have you seen Dr. Who? It's brilliant!" | segment
|
|
150
|
+
```
|
|
124
151
|
|
|
152
|
+
```
|
|
153
|
+
Have you seen Dr. Who?
|
|
154
|
+
It's brilliant!
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
```bash
|
|
125
158
|
# Numbered output
|
|
126
|
-
segment -n "
|
|
159
|
+
segment -n "Gandalf paused... You shall not pass! The Balrog roared."
|
|
160
|
+
```
|
|
127
161
|
|
|
128
|
-
|
|
129
|
-
|
|
162
|
+
```
|
|
163
|
+
1. Gandalf paused...
|
|
164
|
+
2. You shall not pass!
|
|
165
|
+
3. The Balrog roared.
|
|
166
|
+
```
|
|
130
167
|
|
|
168
|
+
```bash
|
|
131
169
|
# From file
|
|
132
|
-
segment -f
|
|
170
|
+
segment -f silmarillion.txt
|
|
133
171
|
```
|
|
134
172
|
|
|
135
173
|
## API Reference
|
|
@@ -20,8 +20,8 @@ fast_sentence_segment/dmo/title_name_merger.py,sha256=zbG04_VjwM8TtT8LhavvmZqIZL
|
|
|
20
20
|
fast_sentence_segment/svc/__init__.py,sha256=9B12mXxBnlalH4OAm1AMLwUMa-RLi2ilv7qhqv26q7g,144
|
|
21
21
|
fast_sentence_segment/svc/perform_paragraph_segmentation.py,sha256=zLKw9rSzb0NNfx4MyEeoGrHwhxTtH5oDrYcAL2LMVHY,1378
|
|
22
22
|
fast_sentence_segment/svc/perform_sentence_segmentation.py,sha256=dqGxFsJoP6ox_MJwtB85R9avEbBAR4x9YKaRaQ5fAXo,5723
|
|
23
|
-
fast_sentence_segment-1.2.
|
|
24
|
-
fast_sentence_segment-1.2.
|
|
25
|
-
fast_sentence_segment-1.2.
|
|
26
|
-
fast_sentence_segment-1.2.
|
|
27
|
-
fast_sentence_segment-1.2.
|
|
23
|
+
fast_sentence_segment-1.2.1.dist-info/METADATA,sha256=OsUlH-UhmI6fw-ChvsF83G_WwTXBlhZPINo243CaziQ,6889
|
|
24
|
+
fast_sentence_segment-1.2.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
25
|
+
fast_sentence_segment-1.2.1.dist-info/entry_points.txt,sha256=mDiRuKOZlOeqmtH1eZwqGEGM6KUh0RTzwyETGMpxSDI,58
|
|
26
|
+
fast_sentence_segment-1.2.1.dist-info/licenses/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
|
|
27
|
+
fast_sentence_segment-1.2.1.dist-info/RECORD,,
|
|
File without changes
|
{fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{fast_sentence_segment-1.2.0.dist-info → fast_sentence_segment-1.2.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|