fast-sentence-segment 1.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -143,34 +143,50 @@ def file_main():
143
143
  )
144
144
  args = parser.parse_args()
145
145
 
146
+ # Echo command immediately
146
147
  _header("segment-file")
148
+ print(f" {DIM}Segmenting text file into sentences{RESET}")
149
+ print()
150
+
151
+ # Show configuration
147
152
  _param("Input", args.input_file)
148
153
  _param("Output", args.output_file)
149
154
  _param("Size", _file_size(args.input_file))
150
155
  _param("Unwrap", "enabled" if args.unwrap else "disabled")
151
156
  _param("Normalize quotes", "disabled" if args.no_normalize_quotes else "enabled")
157
+ print()
152
158
 
159
+ # Step 1: Read file
160
+ print(f" {YELLOW}→{RESET} Reading input file...")
153
161
  with open(args.input_file, "r", encoding="utf-8") as f:
154
162
  text = f.read()
163
+ print(f" {GREEN}✓{RESET} Read {len(text):,} characters")
155
164
 
165
+ # Step 2: Segment text
166
+ print(f" {YELLOW}→{RESET} Segmenting text...", end="", flush=True)
156
167
  start = time.perf_counter()
157
168
  normalize = not args.no_normalize_quotes
158
- with Spinner("Segmenting text..."):
159
- sentences = segment_text(
160
- text.strip(), flatten=True, unwrap=args.unwrap, normalize=normalize,
161
- )
169
+ sentences = segment_text(
170
+ text.strip(), flatten=True, unwrap=args.unwrap, normalize=normalize,
171
+ )
162
172
  elapsed = time.perf_counter() - start
163
-
164
- with Spinner("Writing output..."):
165
- with open(args.output_file, "w", encoding="utf-8") as f:
166
- if args.unwrap:
167
- f.write(format_grouped_sentences(sentences) + "\n")
168
- else:
169
- for sentence in sentences:
170
- f.write(sentence + "\n")
171
-
172
- _done(f"{len(sentences):,} sentences in {elapsed:.2f}s")
173
- _done(f"Written to {args.output_file}")
173
+ print(f"\r {GREEN}✓{RESET} Segmented into {len(sentences):,} sentences ({elapsed:.2f}s)")
174
+
175
+ # Step 3: Write output
176
+ total = len(sentences)
177
+ with open(args.output_file, "w", encoding="utf-8") as f:
178
+ if args.unwrap:
179
+ f.write(format_grouped_sentences(sentences) + "\n")
180
+ print(f" {GREEN}✓{RESET} Written {total:,} sentences to {args.output_file}")
181
+ else:
182
+ for i, sentence in enumerate(sentences, 1):
183
+ f.write(sentence + "\n")
184
+ if i % 500 == 0 or i == total:
185
+ pct = (i / total) * 100
186
+ print(f"\r {YELLOW}→{RESET} Writing... {pct:.0f}% ({i:,}/{total:,})", end="", flush=True)
187
+ print(f"\r {GREEN}✓{RESET} Written {total:,} sentences to {args.output_file} ")
188
+
189
+ print(f"\n {GREEN}Done!{RESET}")
174
190
  print()
175
191
 
176
192
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fast-sentence-segment
3
- Version: 1.4.2
3
+ Version: 1.4.3
4
4
  Summary: Fast and Efficient Sentence Segmentation
5
5
  Home-page: https://github.com/craigtrim/fast-sentence-segment
6
6
  License: MIT
@@ -33,7 +33,6 @@ Description-Content-Type: text/markdown
33
33
 
34
34
  [![PyPI version](https://img.shields.io/pypi/v/fast-sentence-segment.svg)](https://pypi.org/project/fast-sentence-segment/)
35
35
  [![Python versions](https://img.shields.io/pypi/pyversions/fast-sentence-segment.svg)](https://pypi.org/project/fast-sentence-segment/)
36
- [![CI](https://img.shields.io/github/actions/workflow/status/craigtrim/fast-sentence-segment/ci.yml?branch=master&label=CI)](https://github.com/craigtrim/fast-sentence-segment/actions/workflows/ci.yml)
37
36
  [![Tests](https://img.shields.io/badge/tests-664-brightgreen)](https://github.com/craigtrim/fast-sentence-segment/tree/master/tests)
38
37
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
39
38
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
@@ -1,7 +1,7 @@
1
1
  fast_sentence_segment/__init__.py,sha256=jeb4yCy89ivyqbo-4ldJLquPAG_XR_33Q7nrDjqPxvE,1465
2
2
  fast_sentence_segment/bp/__init__.py,sha256=j2-WfQ9WwVuXeGSjvV6XLVwEdvau8sdAQe4Pa4DrYi8,33
3
3
  fast_sentence_segment/bp/segmenter.py,sha256=UW6DguPgA56h-pPYRsfJhjIzBe40j6NdjkwYxamASyA,1928
4
- fast_sentence_segment/cli.py,sha256=I5tLOnojPJLc-S3VHwQdSFON9DcuTjilwFRfwEpVKag,4866
4
+ fast_sentence_segment/cli.py,sha256=vr1Gh-pq4bIPcnhUF6c7ckGdEfoyrI_r0XcrJrIfjEA,5640
5
5
  fast_sentence_segment/core/__init__.py,sha256=uoBersYyVStJ5a8zJpQz1GDGaloEdAv2jGHw1292hRM,108
6
6
  fast_sentence_segment/core/base_object.py,sha256=AYr7yzusIwawjbKdvcv4yTEnhmx6M583kDZzhzPOmq4,635
7
7
  fast_sentence_segment/core/stopwatch.py,sha256=hE6hMz2q6rduaKi58KZmiAL-lRtyh_wWCANhl4KLkRQ,879
@@ -25,8 +25,8 @@ fast_sentence_segment/dmo/unwrap_hard_wrapped_text.py,sha256=V1T5RsJBaII_iGJMyWv
25
25
  fast_sentence_segment/svc/__init__.py,sha256=9B12mXxBnlalH4OAm1AMLwUMa-RLi2ilv7qhqv26q7g,144
26
26
  fast_sentence_segment/svc/perform_paragraph_segmentation.py,sha256=zLKw9rSzb0NNfx4MyEeoGrHwhxTtH5oDrYcAL2LMVHY,1378
27
27
  fast_sentence_segment/svc/perform_sentence_segmentation.py,sha256=mAJEPWqNQFbnlj7Rb7yiXIRHCAdlgsN0jAbg7e2qpMU,7421
28
- fast_sentence_segment-1.4.2.dist-info/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
29
- fast_sentence_segment-1.4.2.dist-info/METADATA,sha256=mUMWyQu_ec1Ugni5zFIHjUbMm4FYCypA1ws_NFhoZhM,7987
30
- fast_sentence_segment-1.4.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
- fast_sentence_segment-1.4.2.dist-info/entry_points.txt,sha256=Zc8OwFKj3ofnjy5ZIFqHzDkIEWweV1AP1xap1ZFGD8M,107
32
- fast_sentence_segment-1.4.2.dist-info/RECORD,,
28
+ fast_sentence_segment-1.4.3.dist-info/LICENSE,sha256=vou5JCLAT5nHcsUv-AkjUYAihYfN9mwPDXxV2DHyHBo,1067
29
+ fast_sentence_segment-1.4.3.dist-info/METADATA,sha256=5LGK9z9ip2AtOr2FgaIgkrR2mLvIQaeeuh8gVi3GBaA,7785
30
+ fast_sentence_segment-1.4.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
+ fast_sentence_segment-1.4.3.dist-info/entry_points.txt,sha256=Zc8OwFKj3ofnjy5ZIFqHzDkIEWweV1AP1xap1ZFGD8M,107
32
+ fast_sentence_segment-1.4.3.dist-info/RECORD,,