glitchlings 0.4.5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (53) hide show
  1. glitchlings/__init__.py +71 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +282 -0
  5. glitchlings/config.py +386 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/data/__init__.py +1 -0
  8. glitchlings/data/hokey_assets.json +193 -0
  9. glitchlings/dlc/__init__.py +7 -0
  10. glitchlings/dlc/_shared.py +153 -0
  11. glitchlings/dlc/huggingface.py +81 -0
  12. glitchlings/dlc/prime.py +254 -0
  13. glitchlings/dlc/pytorch.py +166 -0
  14. glitchlings/dlc/pytorch_lightning.py +209 -0
  15. glitchlings/lexicon/__init__.py +192 -0
  16. glitchlings/lexicon/_cache.py +108 -0
  17. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  18. glitchlings/lexicon/metrics.py +162 -0
  19. glitchlings/lexicon/vector.py +652 -0
  20. glitchlings/lexicon/wordnet.py +228 -0
  21. glitchlings/main.py +364 -0
  22. glitchlings/util/__init__.py +195 -0
  23. glitchlings/util/adapters.py +27 -0
  24. glitchlings/util/hokey_generator.py +144 -0
  25. glitchlings/util/stretch_locator.py +140 -0
  26. glitchlings/util/stretchability.py +375 -0
  27. glitchlings/zoo/__init__.py +172 -0
  28. glitchlings/zoo/_ocr_confusions.py +32 -0
  29. glitchlings/zoo/_rate.py +131 -0
  30. glitchlings/zoo/_rust_extensions.py +143 -0
  31. glitchlings/zoo/_sampling.py +54 -0
  32. glitchlings/zoo/_text_utils.py +100 -0
  33. glitchlings/zoo/adjax.py +128 -0
  34. glitchlings/zoo/apostrofae.py +127 -0
  35. glitchlings/zoo/assets/__init__.py +0 -0
  36. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  37. glitchlings/zoo/core.py +582 -0
  38. glitchlings/zoo/hokey.py +173 -0
  39. glitchlings/zoo/jargoyle.py +335 -0
  40. glitchlings/zoo/mim1c.py +109 -0
  41. glitchlings/zoo/ocr_confusions.tsv +30 -0
  42. glitchlings/zoo/redactyl.py +193 -0
  43. glitchlings/zoo/reduple.py +148 -0
  44. glitchlings/zoo/rushmore.py +153 -0
  45. glitchlings/zoo/scannequin.py +171 -0
  46. glitchlings/zoo/typogre.py +231 -0
  47. glitchlings/zoo/zeedub.py +185 -0
  48. glitchlings-0.4.5.dist-info/METADATA +648 -0
  49. glitchlings-0.4.5.dist-info/RECORD +53 -0
  50. glitchlings-0.4.5.dist-info/WHEEL +5 -0
  51. glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
  52. glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
  53. glitchlings-0.4.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,228 @@
1
+ """WordNet-backed lexicon implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib import import_module
6
+ from pathlib import Path
7
+ from types import ModuleType
8
+ from typing import Any, Callable, Protocol, Sequence, cast
9
+
10
+ from ..compat import nltk as _nltk_dependency
11
+ from . import LexiconBackend
12
+ from ._cache import CacheSnapshot
13
+
14
+
15
+ class _LemmaProtocol(Protocol):
16
+ def name(self) -> str: ...
17
+
18
+
19
+ class _SynsetProtocol(Protocol):
20
+ def lemmas(self) -> Sequence[_LemmaProtocol]: ...
21
+
22
+
23
+ class _WordNetResource(Protocol):
24
+ def synsets(self, word: str, pos: str | None = None) -> Sequence[_SynsetProtocol]: ...
25
+
26
+ def ensure_loaded(self) -> None: ...
27
+
28
+
29
+ WordNetCorpusReaderFactory = Callable[[Any, Any], _WordNetResource]
30
+
31
+ nltk: ModuleType | None = _nltk_dependency.get()
32
+ _NLTK_IMPORT_ERROR: ModuleNotFoundError | None = _nltk_dependency.error
33
+
34
+ WordNetCorpusReader: WordNetCorpusReaderFactory | None = None
35
+ find: Callable[[str], Any] | None = None
36
+ _WORDNET_MODULE: _WordNetResource | None = None
37
+
38
+ if nltk is not None: # pragma: no cover - guarded by import success
39
+ try:
40
+ corpus_reader_module = import_module("nltk.corpus.reader")
41
+ except ModuleNotFoundError as exc: # pragma: no cover - triggered when corpus missing
42
+ if _NLTK_IMPORT_ERROR is None:
43
+ _NLTK_IMPORT_ERROR = exc
44
+ else:
45
+ reader_candidate = getattr(corpus_reader_module, "WordNetCorpusReader", None)
46
+ if reader_candidate is not None:
47
+ WordNetCorpusReader = cast(WordNetCorpusReaderFactory, reader_candidate)
48
+
49
+ try:
50
+ data_module = import_module("nltk.data")
51
+ except ModuleNotFoundError as exc: # pragma: no cover - triggered when data missing
52
+ if _NLTK_IMPORT_ERROR is None:
53
+ _NLTK_IMPORT_ERROR = exc
54
+ else:
55
+ locator = getattr(data_module, "find", None)
56
+ if callable(locator):
57
+ find = cast(Callable[[str], Any], locator)
58
+
59
+ try:
60
+ module_candidate = import_module("nltk.corpus.wordnet")
61
+ except ModuleNotFoundError: # pragma: no cover - only hit on namespace packages
62
+ _WORDNET_MODULE = None
63
+ else:
64
+ _WORDNET_MODULE = cast(_WordNetResource, module_candidate)
65
+ else:
66
+ nltk = None
67
+ find = None
68
+ _WORDNET_MODULE = None
69
+
70
+ _WORDNET_HANDLE: _WordNetResource | None = _WORDNET_MODULE
71
+ _wordnet_ready = False
72
+
73
+ _VALID_POS: tuple[str, ...] = ("n", "v", "a", "r")
74
+
75
+
76
+ def _require_nltk() -> None:
77
+ """Ensure the NLTK dependency is present before continuing."""
78
+ if nltk is None or find is None:
79
+ message = (
80
+ "The NLTK package is required for WordNet-backed lexicons; install "
81
+ "`nltk` and its WordNet corpus manually to enable this backend."
82
+ )
83
+ if "_NLTK_IMPORT_ERROR" in globals() and _NLTK_IMPORT_ERROR is not None:
84
+ raise RuntimeError(message) from _NLTK_IMPORT_ERROR
85
+ raise RuntimeError(message)
86
+
87
+
88
+ def dependencies_available() -> bool:
89
+ """Return ``True`` when the runtime NLTK dependency is present."""
90
+ return nltk is not None and find is not None
91
+
92
+
93
+ def _load_wordnet_reader() -> _WordNetResource:
94
+ """Return a WordNet corpus reader from the downloaded corpus files."""
95
+ _require_nltk()
96
+
97
+ if WordNetCorpusReader is None:
98
+ raise RuntimeError("The NLTK WordNet corpus reader is unavailable.")
99
+
100
+ locator = find
101
+ if locator is None:
102
+ raise RuntimeError("The NLTK data locator is unavailable.")
103
+
104
+ try:
105
+ root = locator("corpora/wordnet")
106
+ except LookupError:
107
+ try:
108
+ zip_root = locator("corpora/wordnet.zip")
109
+ except LookupError as exc:
110
+ raise RuntimeError(
111
+ "The NLTK WordNet corpus is not installed; run `nltk.download('wordnet')`."
112
+ ) from exc
113
+ root = zip_root.join("wordnet/")
114
+
115
+ return WordNetCorpusReader(root, None)
116
+
117
+
118
+ def _wordnet(force_refresh: bool = False) -> _WordNetResource:
119
+ """Retrieve the active WordNet handle, rebuilding it on demand."""
120
+ global _WORDNET_HANDLE
121
+
122
+ if force_refresh:
123
+ _WORDNET_HANDLE = _WORDNET_MODULE
124
+
125
+ cached = _WORDNET_HANDLE
126
+ if cached is not None:
127
+ return cached
128
+
129
+ resource = _load_wordnet_reader()
130
+ _WORDNET_HANDLE = resource
131
+ return resource
132
+
133
+
134
+ def ensure_wordnet() -> None:
135
+ """Ensure the WordNet corpus is available before use."""
136
+ global _wordnet_ready
137
+ if _wordnet_ready:
138
+ return
139
+
140
+ _require_nltk()
141
+
142
+ resource = _wordnet()
143
+ nltk_module = nltk
144
+ if nltk_module is None:
145
+ raise RuntimeError("The NLTK dependency is unexpectedly unavailable.")
146
+
147
+ try:
148
+ resource.ensure_loaded()
149
+ except LookupError:
150
+ nltk_module.download("wordnet", quiet=True)
151
+ try:
152
+ resource = _wordnet(force_refresh=True)
153
+ resource.ensure_loaded()
154
+ except LookupError as exc: # pragma: no cover - only triggered when download fails
155
+ raise RuntimeError("Unable to load NLTK WordNet corpus for synonym lookups.") from exc
156
+
157
+ _wordnet_ready = True
158
+
159
+
160
+ def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
161
+ """Gather deterministic synonym candidates for the supplied word."""
162
+ normalized_word = word.lower()
163
+ wordnet = _wordnet()
164
+ synonyms: set[str] = set()
165
+ for pos_tag in parts_of_speech:
166
+ synsets = wordnet.synsets(word, pos=pos_tag)
167
+ if not synsets:
168
+ continue
169
+
170
+ for synset in synsets:
171
+ lemmas_list = [lemma.name() for lemma in synset.lemmas()]
172
+ if not lemmas_list:
173
+ continue
174
+
175
+ filtered = []
176
+ for lemma_str in lemmas_list:
177
+ cleaned = lemma_str.replace("_", " ")
178
+ if cleaned.lower() != normalized_word:
179
+ filtered.append(cleaned)
180
+
181
+ if filtered:
182
+ synonyms.update(filtered)
183
+ break
184
+
185
+ if synonyms:
186
+ break
187
+
188
+ return sorted(synonyms)
189
+
190
+
191
+ class WordNetLexicon(LexiconBackend):
192
+ """Lexicon that retrieves synonyms from the NLTK WordNet corpus."""
193
+
194
+ def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
195
+ """Return up to ``n`` WordNet lemmas for ``word`` filtered by ``pos`` if provided."""
196
+ ensure_wordnet()
197
+
198
+ if pos is None:
199
+ parts: tuple[str, ...] = _VALID_POS
200
+ else:
201
+ normalized_pos = pos.lower()
202
+ if normalized_pos not in _VALID_POS:
203
+ return []
204
+ parts = (normalized_pos,)
205
+
206
+ synonyms = _collect_synonyms(word, parts)
207
+ return self._deterministic_sample(synonyms, limit=n, word=word, pos=pos)
208
+
209
+ def supports_pos(self, pos: str | None) -> bool:
210
+ """Return ``True`` when ``pos`` is unset or recognised by the WordNet corpus."""
211
+ if pos is None:
212
+ return True
213
+ return pos.lower() in _VALID_POS
214
+
215
+ @classmethod
216
+ def load_cache(cls, path: str | Path) -> CacheSnapshot:
217
+ """WordNet lexicons do not persist caches; raising keeps the contract explicit."""
218
+ raise RuntimeError("WordNetLexicon does not persist or load caches.")
219
+
220
+ def save_cache(self, path: str | Path | None = None) -> Path | None:
221
+ """WordNet lexicons do not persist caches; raising keeps the contract explicit."""
222
+ raise RuntimeError("WordNetLexicon does not persist or load caches.")
223
+
224
+ def __repr__(self) -> str: # pragma: no cover - trivial representation
225
+ return f"WordNetLexicon(seed={self.seed!r})"
226
+
227
+
228
+ __all__ = ["WordNetLexicon", "dependencies_available", "ensure_wordnet"]
glitchlings/main.py ADDED
@@ -0,0 +1,364 @@
1
+ """Command line interface for summoning and running glitchlings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import difflib
7
+ import sys
8
+ from collections.abc import Sequence
9
+ from pathlib import Path
10
+ from typing import cast
11
+
12
+ from . import SAMPLE_TEXT
13
+ from .config import DEFAULT_ATTACK_SEED, build_gaggle, load_attack_config
14
+ from .zoo import (
15
+ BUILTIN_GLITCHLINGS,
16
+ DEFAULT_GLITCHLING_NAMES,
17
+ Gaggle,
18
+ Glitchling,
19
+ parse_glitchling_spec,
20
+ summon,
21
+ )
22
+
23
+ MAX_NAME_WIDTH = max(len(glitchling.name) for glitchling in BUILTIN_GLITCHLINGS.values())
24
+
25
+
26
+ def build_parser() -> argparse.ArgumentParser:
27
+ """Create and configure the CLI argument parser.
28
+
29
+ Returns:
30
+ argparse.ArgumentParser: The configured argument parser instance.
31
+
32
+ """
33
+ parser = argparse.ArgumentParser(
34
+ description=(
35
+ "Summon glitchlings to corrupt text. Provide input text as an argument, "
36
+ "via --file, or pipe it on stdin."
37
+ )
38
+ )
39
+ parser.add_argument(
40
+ "text",
41
+ nargs="?",
42
+ help="Text to corrupt. If omitted, stdin is used or --sample provides fallback text.",
43
+ )
44
+ parser.add_argument(
45
+ "-g",
46
+ "--glitchling",
47
+ dest="glitchlings",
48
+ action="append",
49
+ metavar="SPEC",
50
+ help=(
51
+ "Glitchling to apply, optionally with parameters like "
52
+ "Typogre(rate=0.05). Repeat for multiples; defaults to all built-ins."
53
+ ),
54
+ )
55
+ parser.add_argument(
56
+ "-s",
57
+ "--seed",
58
+ type=int,
59
+ default=None,
60
+ help="Seed controlling deterministic corruption order (default: 151).",
61
+ )
62
+ parser.add_argument(
63
+ "-f",
64
+ "--file",
65
+ type=Path,
66
+ help="Read input text from a file instead of the command line argument.",
67
+ )
68
+ parser.add_argument(
69
+ "--sample",
70
+ action="store_true",
71
+ help="Use the included SAMPLE_TEXT when no other input is provided.",
72
+ )
73
+ parser.add_argument(
74
+ "--diff",
75
+ action="store_true",
76
+ help="Show a unified diff between the original and corrupted text.",
77
+ )
78
+ parser.add_argument(
79
+ "--list",
80
+ action="store_true",
81
+ help="List available glitchlings and exit.",
82
+ )
83
+ parser.add_argument(
84
+ "-c",
85
+ "--config",
86
+ type=Path,
87
+ help="Load glitchlings from a YAML configuration file.",
88
+ )
89
+ return parser
90
+
91
+
92
+ def build_lexicon_parser() -> argparse.ArgumentParser:
93
+ """Create the ``build-lexicon`` subcommand parser with vector cache options."""
94
+ builder = argparse.ArgumentParser(
95
+ prog="glitchlings build-lexicon",
96
+ description=(
97
+ "Generate deterministic synonym caches using vector embeddings so "
98
+ "they can be distributed without bundling large models."
99
+ ),
100
+ )
101
+ builder.add_argument(
102
+ "--source",
103
+ required=True,
104
+ help=(
105
+ "Vector source specification. Use 'spacy:<model>' for spaCy pipelines "
106
+ "or provide a path to a gensim KeyedVectors/word2vec file."
107
+ ),
108
+ )
109
+ builder.add_argument(
110
+ "--output",
111
+ required=True,
112
+ type=Path,
113
+ help="Path to the JSON file that will receive the synonym cache.",
114
+ )
115
+ builder.add_argument(
116
+ "--tokens",
117
+ type=Path,
118
+ help="Optional newline-delimited vocabulary file to restrict generation.",
119
+ )
120
+ builder.add_argument(
121
+ "--max-neighbors",
122
+ type=int,
123
+ default=50,
124
+ help="Number of nearest neighbours to cache per token (default: 50).",
125
+ )
126
+ builder.add_argument(
127
+ "--min-similarity",
128
+ type=float,
129
+ default=0.0,
130
+ help="Minimum cosine similarity required to keep a synonym (default: 0.0).",
131
+ )
132
+ builder.add_argument(
133
+ "--seed",
134
+ type=int,
135
+ help="Optional deterministic seed to bake into the resulting cache.",
136
+ )
137
+ builder.add_argument(
138
+ "--case-sensitive",
139
+ action="store_true",
140
+ help="Preserve original casing instead of lower-casing cache keys.",
141
+ )
142
+ builder.add_argument(
143
+ "--normalizer",
144
+ choices=["lower", "identity"],
145
+ default="lower",
146
+ help="Token normalization strategy for cache keys (default: lower).",
147
+ )
148
+ builder.add_argument(
149
+ "--limit",
150
+ type=int,
151
+ help="Optional maximum number of tokens to process.",
152
+ )
153
+ builder.add_argument(
154
+ "--overwrite",
155
+ action="store_true",
156
+ help="Allow overwriting an existing cache file.",
157
+ )
158
+ return builder
159
+
160
+
161
+ def list_glitchlings() -> None:
162
+ """Print information about the available built-in glitchlings."""
163
+ for key in DEFAULT_GLITCHLING_NAMES:
164
+ glitchling = BUILTIN_GLITCHLINGS[key]
165
+ display_name = glitchling.name
166
+ scope = glitchling.level.name.title()
167
+ order = glitchling.order.name.lower()
168
+ print(f"{display_name:>{MAX_NAME_WIDTH}} — scope: {scope}, order: {order}")
169
+
170
+
171
+ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
172
+ """Resolve the input text based on CLI arguments.
173
+
174
+ Args:
175
+ args: Parsed arguments from the CLI.
176
+ parser: The argument parser used for emitting user-facing errors.
177
+
178
+ Returns:
179
+ str: The text to corrupt.
180
+
181
+ Raises:
182
+ SystemExit: Raised indirectly via ``parser.error`` on failure.
183
+
184
+ """
185
+ file_path = cast(Path | None, getattr(args, "file", None))
186
+ if file_path is not None:
187
+ try:
188
+ return file_path.read_text(encoding="utf-8")
189
+ except OSError as exc:
190
+ filename = getattr(exc, "filename", None) or file_path
191
+ reason = exc.strerror or str(exc)
192
+ parser.error(f"Failed to read file {filename}: {reason}")
193
+
194
+ text_argument = cast(str | None, getattr(args, "text", None))
195
+ if text_argument:
196
+ return text_argument
197
+
198
+ if not sys.stdin.isatty():
199
+ return sys.stdin.read()
200
+
201
+ if bool(getattr(args, "sample", False)):
202
+ return SAMPLE_TEXT
203
+
204
+ parser.error(
205
+ "No input text provided. Supply text as an argument, use --file, pipe input, or "
206
+ "pass --sample."
207
+ )
208
+ raise AssertionError("parser.error should exit")
209
+
210
+
211
+ def summon_glitchlings(
212
+ names: list[str] | None,
213
+ parser: argparse.ArgumentParser,
214
+ seed: int | None,
215
+ *,
216
+ config_path: Path | None = None,
217
+ ) -> Gaggle:
218
+ """Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
219
+ if config_path is not None:
220
+ if names:
221
+ parser.error("Cannot combine --config with --glitchling.")
222
+ raise AssertionError("parser.error should exit")
223
+
224
+ try:
225
+ config = load_attack_config(config_path)
226
+ except (TypeError, ValueError) as exc:
227
+ parser.error(str(exc))
228
+ raise AssertionError("parser.error should exit")
229
+
230
+ return build_gaggle(config, seed_override=seed)
231
+
232
+ normalized: Sequence[str | Glitchling]
233
+ if names:
234
+ parsed: list[str | Glitchling] = []
235
+ for specification in names:
236
+ try:
237
+ parsed.append(parse_glitchling_spec(specification))
238
+ except ValueError as exc:
239
+ parser.error(str(exc))
240
+ raise AssertionError("parser.error should exit")
241
+ normalized = parsed
242
+ else:
243
+ normalized = list(DEFAULT_GLITCHLING_NAMES)
244
+
245
+ effective_seed = seed if seed is not None else DEFAULT_ATTACK_SEED
246
+
247
+ try:
248
+ return summon(list(normalized), seed=effective_seed)
249
+ except ValueError as exc:
250
+ parser.error(str(exc))
251
+ raise AssertionError("parser.error should exit")
252
+
253
+
254
+ def show_diff(original: str, corrupted: str) -> None:
255
+ """Display a unified diff between the original and corrupted text."""
256
+ diff_lines = list(
257
+ difflib.unified_diff(
258
+ original.splitlines(keepends=True),
259
+ corrupted.splitlines(keepends=True),
260
+ fromfile="original",
261
+ tofile="corrupted",
262
+ lineterm="",
263
+ )
264
+ )
265
+ if diff_lines:
266
+ for line in diff_lines:
267
+ print(line)
268
+ else:
269
+ print("No changes detected.")
270
+
271
+
272
+ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
273
+ """Execute the CLI workflow using the provided arguments.
274
+
275
+ Args:
276
+ args: Parsed CLI arguments.
277
+ parser: Argument parser used for error reporting.
278
+
279
+ Returns:
280
+ int: Exit code for the process (``0`` on success).
281
+
282
+ """
283
+ if args.list:
284
+ list_glitchlings()
285
+ return 0
286
+
287
+ text = read_text(args, parser)
288
+ gaggle = summon_glitchlings(
289
+ args.glitchlings,
290
+ parser,
291
+ args.seed,
292
+ config_path=args.config,
293
+ )
294
+
295
+ corrupted = gaggle.corrupt(text)
296
+ if not isinstance(corrupted, str):
297
+ message = "Gaggle returned non-string output for string input"
298
+ raise TypeError(message)
299
+
300
+ if args.diff:
301
+ show_diff(text, corrupted)
302
+ else:
303
+ print(corrupted)
304
+
305
+ return 0
306
+
307
+
308
+ def run_build_lexicon(args: argparse.Namespace) -> int:
309
+ """Delegate to the vector lexicon cache builder using CLI arguments."""
310
+ from glitchlings.lexicon.vector import main as vector_main
311
+
312
+ vector_args = [
313
+ "--source",
314
+ args.source,
315
+ "--output",
316
+ str(args.output),
317
+ "--max-neighbors",
318
+ str(args.max_neighbors),
319
+ "--min-similarity",
320
+ str(args.min_similarity),
321
+ "--normalizer",
322
+ args.normalizer,
323
+ ]
324
+ if args.tokens is not None:
325
+ vector_args.extend(["--tokens", str(args.tokens)])
326
+ if args.seed is not None:
327
+ vector_args.extend(["--seed", str(args.seed)])
328
+ if args.case_sensitive:
329
+ vector_args.append("--case-sensitive")
330
+ if args.limit is not None:
331
+ vector_args.extend(["--limit", str(args.limit)])
332
+ if args.overwrite:
333
+ vector_args.append("--overwrite")
334
+
335
+ return vector_main(vector_args)
336
+
337
+
338
+ def main(argv: list[str] | None = None) -> int:
339
+ """Entry point for the ``glitchlings`` command line interface.
340
+
341
+ Args:
342
+ argv: Optional list of command line arguments. Defaults to ``sys.argv``.
343
+
344
+ Returns:
345
+ int: Exit code suitable for use with ``sys.exit``.
346
+
347
+ """
348
+ if argv is None:
349
+ raw_args = sys.argv[1:]
350
+ else:
351
+ raw_args = list(argv)
352
+
353
+ if raw_args and raw_args[0] == "build-lexicon":
354
+ builder = build_lexicon_parser()
355
+ args = builder.parse_args(raw_args[1:])
356
+ return run_build_lexicon(args)
357
+
358
+ parser = build_parser()
359
+ args = parser.parse_args(raw_args)
360
+ return run_cli(args, parser)
361
+
362
+
363
+ if __name__ == "__main__":
364
+ sys.exit(main())