play-parser 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. play_parser-1.0.0/CHANGELOG.md +16 -0
  2. play_parser-1.0.0/LICENSE +21 -0
  3. play_parser-1.0.0/MANIFEST.in +5 -0
  4. play_parser-1.0.0/PKG-INFO +195 -0
  5. play_parser-1.0.0/README.md +162 -0
  6. play_parser-1.0.0/RELEASE.md +62 -0
  7. play_parser-1.0.0/SECURITY.md +11 -0
  8. play_parser-1.0.0/data/README.md +7 -0
  9. play_parser-1.0.0/data/json/optimal/A Midsummer Night's Dream.json +5008 -0
  10. play_parser-1.0.0/data/json/optimal/All's Well That Ends Well.json +8665 -0
  11. play_parser-1.0.0/data/json/optimal/Antony and Cleopatra.json +11638 -0
  12. play_parser-1.0.0/data/json/optimal/As You Like It.json +7585 -0
  13. play_parser-1.0.0/data/json/optimal/Coriolanus.json +10572 -0
  14. play_parser-1.0.0/data/json/optimal/Cymbeline.json +8624 -0
  15. play_parser-1.0.0/data/json/optimal/Hamlet.json +11084 -0
  16. play_parser-1.0.0/data/json/optimal/Henry IV, Part 1.json +7418 -0
  17. play_parser-1.0.0/data/json/optimal/Henry IV, Part 2.json +8115 -0
  18. play_parser-1.0.0/data/json/optimal/Henry V.json +6956 -0
  19. play_parser-1.0.0/data/json/optimal/Henry VI, Part 1.json +6730 -0
  20. play_parser-1.0.0/data/json/optimal/Henry VI, Part 2.json +7961 -0
  21. play_parser-1.0.0/data/json/optimal/Henry VI, Part 3.json +7962 -0
  22. play_parser-1.0.0/data/json/optimal/Henry VIII.json +6823 -0
  23. play_parser-1.0.0/data/json/optimal/Julius Caesar.json +7722 -0
  24. play_parser-1.0.0/data/json/optimal/King John.json +5252 -0
  25. play_parser-1.0.0/data/json/optimal/King Lear.json +10634 -0
  26. play_parser-1.0.0/data/json/optimal/Love's Labour's Lost.json +9496 -0
  27. play_parser-1.0.0/data/json/optimal/Macbeth.json +6730 -0
  28. play_parser-1.0.0/data/json/optimal/Measure for Measure.json +8557 -0
  29. play_parser-1.0.0/data/json/optimal/Much Ado About Nothing.json +8691 -0
  30. play_parser-1.0.0/data/json/optimal/Othello.json +11014 -0
  31. play_parser-1.0.0/data/json/optimal/Pericles, Prince of Tyre.json +6400 -0
  32. play_parser-1.0.0/data/json/optimal/README.md +7 -0
  33. play_parser-1.0.0/data/json/optimal/Richard II.json +5422 -0
  34. play_parser-1.0.0/data/json/optimal/Richard III.json +10683 -0
  35. play_parser-1.0.0/data/json/optimal/Romeo and Juliet.json +8273 -0
  36. play_parser-1.0.0/data/json/optimal/The Comedy of Errors.json +5568 -0
  37. play_parser-1.0.0/data/json/optimal/The Importance of Being Earnest.json +6213 -0
  38. play_parser-1.0.0/data/json/optimal/The Merchant of Venice.json +6115 -0
  39. play_parser-1.0.0/data/json/optimal/The Merry Wives of Windsor.json +9801 -0
  40. play_parser-1.0.0/data/json/optimal/The Taming of the Shrew.json +8497 -0
  41. play_parser-1.0.0/data/json/optimal/The Tempest.json +6235 -0
  42. play_parser-1.0.0/data/json/optimal/The Winter's Tale.json +7037 -0
  43. play_parser-1.0.0/data/json/optimal/Timon of Athens.json +7379 -0
  44. play_parser-1.0.0/data/json/optimal/Titus Andronicus.json +5876 -0
  45. play_parser-1.0.0/data/json/optimal/Troilus and Cressida.json +7458 -0
  46. play_parser-1.0.0/data/json/optimal/Twelfth Night.json +8647 -0
  47. play_parser-1.0.0/data/json/optimal/Two Gentlemen of Verona.json +7751 -0
  48. play_parser-1.0.0/data/text/raw/A Midsummer Night's Dream.txt +2806 -0
  49. play_parser-1.0.0/data/text/raw/All's Well That Ends Well.txt +4026 -0
  50. play_parser-1.0.0/data/text/raw/Antony and Cleopatra.txt +5042 -0
  51. play_parser-1.0.0/data/text/raw/As You Like It.txt +3639 -0
  52. play_parser-1.0.0/data/text/raw/Coriolanus.txt +5103 -0
  53. play_parser-1.0.0/data/text/raw/Cymbeline.txt +4823 -0
  54. play_parser-1.0.0/data/text/raw/Hamlet.txt +5387 -0
  55. play_parser-1.0.0/data/text/raw/Henry IV, Part 1.txt +3988 -0
  56. play_parser-1.0.0/data/text/raw/Henry IV, Part 2.txt +4268 -0
  57. play_parser-1.0.0/data/text/raw/Henry V.txt +4132 -0
  58. play_parser-1.0.0/data/text/raw/Henry VI, Part 1.txt +3636 -0
  59. play_parser-1.0.0/data/text/raw/Henry VI, Part 2.txt +4131 -0
  60. play_parser-1.0.0/data/text/raw/Henry VI, Part 3.txt +3957 -0
  61. play_parser-1.0.0/data/text/raw/Henry VIII.txt +4095 -0
  62. play_parser-1.0.0/data/text/raw/Julius Caesar.txt +3574 -0
  63. play_parser-1.0.0/data/text/raw/King John.txt +3323 -0
  64. play_parser-1.0.0/data/text/raw/King Lear.txt +4839 -0
  65. play_parser-1.0.0/data/text/raw/Love's Labour's Lost.txt +4038 -0
  66. play_parser-1.0.0/data/text/raw/Macbeth.txt +3242 -0
  67. play_parser-1.0.0/data/text/raw/Measure for Measure.txt +3896 -0
  68. play_parser-1.0.0/data/text/raw/Much Ado About Nothing.txt +3689 -0
  69. play_parser-1.0.0/data/text/raw/Othello.txt +4883 -0
  70. play_parser-1.0.0/data/text/raw/Pericles, Prince of Tyre.txt +3228 -0
  71. play_parser-1.0.0/data/text/raw/Richard II.txt +3498 -0
  72. play_parser-1.0.0/data/text/raw/Richard III.txt +5037 -0
  73. play_parser-1.0.0/data/text/raw/Romeo and Juliet.txt +4143 -0
  74. play_parser-1.0.0/data/text/raw/The Comedy of Errors.txt +2666 -0
  75. play_parser-1.0.0/data/text/raw/The Importance of Being Earnest.txt +3841 -0
  76. play_parser-1.0.0/data/text/raw/The Merchant of Venice.txt +3445 -0
  77. play_parser-1.0.0/data/text/raw/The Merry Wives of Windsor.txt +3856 -0
  78. play_parser-1.0.0/data/text/raw/The Taming of the Shrew.txt +3705 -0
  79. play_parser-1.0.0/data/text/raw/The Tempest.txt +3049 -0
  80. play_parser-1.0.0/data/text/raw/The Winter's Tale.txt +4240 -0
  81. play_parser-1.0.0/data/text/raw/Timon of Athens.txt +3381 -0
  82. play_parser-1.0.0/data/text/raw/Titus Andronicus.txt +3298 -0
  83. play_parser-1.0.0/data/text/raw/Troilus and Cressida.txt +3539 -0
  84. play_parser-1.0.0/data/text/raw/Twelfth Night.txt +3578 -0
  85. play_parser-1.0.0/data/text/raw/Two Gentlemen of Verona.txt +3221 -0
  86. play_parser-1.0.0/docs/API.md +207 -0
  87. play_parser-1.0.0/docs/FORMAT_PROFILES.md +196 -0
  88. play_parser-1.0.0/docs/JSON_SCHEMA.md +150 -0
  89. play_parser-1.0.0/docs/play_document.schema.json +241 -0
  90. play_parser-1.0.0/pyproject.toml +69 -0
  91. play_parser-1.0.0/setup.cfg +4 -0
  92. play_parser-1.0.0/src/play_parser/__init__.py +32 -0
  93. play_parser-1.0.0/src/play_parser/__main__.py +6 -0
  94. play_parser-1.0.0/src/play_parser/_io.py +51 -0
  95. play_parser-1.0.0/src/play_parser/cli/__init__.py +3 -0
  96. play_parser-1.0.0/src/play_parser/cli/main.py +240 -0
  97. play_parser-1.0.0/src/play_parser/document/__init__.py +21 -0
  98. play_parser-1.0.0/src/play_parser/document/assembler.py +185 -0
  99. play_parser-1.0.0/src/play_parser/document/builder.py +170 -0
  100. play_parser-1.0.0/src/play_parser/document/constants.py +17 -0
  101. play_parser-1.0.0/src/play_parser/document/text.py +15 -0
  102. play_parser-1.0.0/src/play_parser/document/types.py +92 -0
  103. play_parser-1.0.0/src/play_parser/document/validation.py +250 -0
  104. play_parser-1.0.0/src/play_parser/domain/__init__.py +11 -0
  105. play_parser-1.0.0/src/play_parser/domain/play.py +743 -0
  106. play_parser-1.0.0/src/play_parser/ingestion/__init__.py +3 -0
  107. play_parser-1.0.0/src/play_parser/ingestion/ingestor.py +181 -0
  108. play_parser-1.0.0/src/play_parser/parsing/__init__.py +18 -0
  109. play_parser-1.0.0/src/play_parser/parsing/context.py +103 -0
  110. play_parser-1.0.0/src/play_parser/parsing/front_matter.py +86 -0
  111. play_parser-1.0.0/src/play_parser/parsing/parser.py +292 -0
  112. play_parser-1.0.0/src/play_parser/parsing/profiles/__init__.py +15 -0
  113. play_parser-1.0.0/src/play_parser/parsing/profiles/builtins/__init__.py +1 -0
  114. play_parser-1.0.0/src/play_parser/parsing/profiles/builtins/colon_inline.json +15 -0
  115. play_parser-1.0.0/src/play_parser/parsing/profiles/builtins/dot_block.json +16 -0
  116. play_parser-1.0.0/src/play_parser/parsing/profiles/builtins/dot_inline.json +16 -0
  117. play_parser-1.0.0/src/play_parser/parsing/profiles/builtins/mixed_parenthetical.json +17 -0
  118. play_parser-1.0.0/src/play_parser/parsing/profiles/builtins/narrative_stage_heavy.json +17 -0
  119. play_parser-1.0.0/src/play_parser/parsing/profiles/loader.py +149 -0
  120. play_parser-1.0.0/src/play_parser/parsing/profiles/schema.py +139 -0
  121. play_parser-1.0.0/src/play_parser/parsing/speakers.py +130 -0
  122. play_parser-1.0.0/src/play_parser/parsing/speech.py +604 -0
  123. play_parser-1.0.0/src/play_parser/parsing/stage.py +178 -0
  124. play_parser-1.0.0/src/play_parser/parsing/structure.py +87 -0
  125. play_parser-1.0.0/src/play_parser/py.typed +0 -0
  126. play_parser-1.0.0/src/play_parser.egg-info/PKG-INFO +195 -0
  127. play_parser-1.0.0/src/play_parser.egg-info/SOURCES.txt +135 -0
  128. play_parser-1.0.0/src/play_parser.egg-info/dependency_links.txt +1 -0
  129. play_parser-1.0.0/src/play_parser.egg-info/entry_points.txt +2 -0
  130. play_parser-1.0.0/src/play_parser.egg-info/requires.txt +6 -0
  131. play_parser-1.0.0/src/play_parser.egg-info/top_level.txt +1 -0
  132. play_parser-1.0.0/tests/test_document_roundtrip.py +83 -0
  133. play_parser-1.0.0/tests/test_domain.py +90 -0
  134. play_parser-1.0.0/tests/test_ingestor.py +110 -0
  135. play_parser-1.0.0/tests/test_parser_core.py +83 -0
  136. play_parser-1.0.0/tests/test_profiles.py +161 -0
  137. play_parser-1.0.0/tests/test_public_api.py +32 -0
@@ -0,0 +1,16 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0
4
+
5
+ Initial public release.
6
+
7
+ ### Features
8
+
9
+ - Parse raw `.txt` play files into canonical JSON documents.
10
+ - Assemble canonical JSON documents back into normalised play text.
11
+ - Support explicit format profiles for speaker labels and stage directions.
12
+ - Include built-in profiles for common dramatic text layouts.
13
+ - Validate canonical play documents with a shared validator and JSON Schema.
14
+ - Expose a Python API for ingestion, domain access, validation, assembly, and profiles.
15
+ - Provide a `play-parser` CLI for parsing and assembling single files or folders.
16
+ - Include a corpus-backed test suite covering parsing, profiles, ingestion, domain objects, and roundtrips.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Stergios Poularakis
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ include CHANGELOG.md
2
+ include RELEASE.md
3
+ include SECURITY.md
4
+ recursive-include docs *.md *.json
5
+ recursive-include data *.md *.txt *.json
@@ -0,0 +1,195 @@
1
+ Metadata-Version: 2.4
2
+ Name: play-parser
3
+ Version: 1.0.0
4
+ Summary: Parse dramatic play text into ordered dramatic events.
5
+ Author: Stergios Poularakis
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/stpoular/play-parser
8
+ Project-URL: Documentation, https://github.com/stpoular/play-parser/tree/main/docs
9
+ Project-URL: Repository, https://github.com/stpoular/play-parser
10
+ Project-URL: Issues, https://github.com/stpoular/play-parser/issues
11
+ Project-URL: Changelog, https://github.com/stpoular/play-parser/blob/main/CHANGELOG.md
12
+ Keywords: theatre,drama,plays,parser,json
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Operating System :: OS Independent
22
+ Classifier: Topic :: Text Processing
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.10
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Provides-Extra: dev
28
+ Requires-Dist: build>=1; extra == "dev"
29
+ Requires-Dist: pytest>=8; extra == "dev"
30
+ Requires-Dist: ruff>=0.8; extra == "dev"
31
+ Requires-Dist: twine>=5; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # play-parser
35
+
36
+ `play-parser` parses theatrical play text into a canonical JSON document and assembles canonical documents back into normalised play text.
37
+
38
+ Canonical text uses a stable output format. For example, speech labels are emitted in colon form such as `Hamlet: ...`, even when the source text used another supported layout.
39
+
40
+ ## Features
41
+
42
+ - Parse raw `.txt` play files into structured JSON.
43
+ - Assemble canonical JSON documents into normalised `.txt` output.
44
+ - Read and validate existing canonical `.json` documents.
45
+ - Work with explicit parsing profiles for different source formats.
46
+ - Preserve speeches, stage directions, acts, scenes, metadata, characters, and document statistics.
47
+ - Use the package from Python or through the `play-parser` command line interface.
48
+
49
+ ## Supported inputs
50
+
51
+ - Raw `.txt` play files.
52
+ - Canonical `.json` documents produced by this package.
53
+
54
+ The package does not parse PDFs, DOCX files, HTML pages, scans, images, or audio directly. Convert those sources to text first.
55
+
56
+ ## Installation
57
+
58
+ ```bash
59
+ pip install play-parser
60
+ ```
61
+
62
+ ## Python quick start
63
+
64
+ ```python
65
+ from play_parser import Play, PlayIngestor
66
+
67
+ ingestor = PlayIngestor("Hamlet.txt", profile="colon_inline")
68
+ play = Play(ingestor.data)
69
+
70
+ print(play.title)
71
+ print(play.author)
72
+ print(len(play.acts))
73
+ print(len(play.scenes))
74
+ print(len(play.characters))
75
+ print(len(play.speeches))
76
+
77
+ play.save_json("Hamlet.json")
78
+ play.save_text("Hamlet.canonical.txt")
79
+ ```
80
+
81
+ Parse text that is already in memory:
82
+
83
+ ```python
84
+ from play_parser import PlayIngestor
85
+
86
+ text = "ACT I\n\nSCENE I.\n\nHAMLET: Who's there?"
87
+ ingestor = PlayIngestor.from_text(text, source_name="Hamlet.txt", profile="colon_inline")
88
+ document = ingestor.data
89
+ ```
90
+
91
+ Assemble a canonical document:
92
+
93
+ ```python
94
+ from play_parser import assemble_play_text
95
+
96
+ canonical_text = assemble_play_text(document)
97
+ ```
98
+
99
+ ## Command line usage
100
+
101
+ Show help and version information:
102
+
103
+ ```bash
104
+ play-parser --help
105
+ play-parser --version
106
+ ```
107
+
108
+ Parse one file:
109
+
110
+ ```bash
111
+ play-parser parse Hamlet.txt \
112
+ --profile colon_inline \
113
+ --json-output Hamlet.json \
114
+ --text-output Hamlet.canonical.txt
115
+ ```
116
+
117
+ Parse a folder recursively:
118
+
119
+ ```bash
120
+ play-parser parse \
121
+ --input-root data/text/raw \
122
+ --recursive \
123
+ --profile colon_inline \
124
+ --json-output-root data/json/generated
125
+ ```
126
+
127
+ Assemble canonical JSON files into text:
128
+
129
+ ```bash
130
+ play-parser assemble \
131
+ --input-root data/json/generated \
132
+ --recursive \
133
+ --output-root data/text/canonical
134
+ ```
135
+
136
+ ## Public API
137
+
138
+ Stable top-level imports:
139
+
140
+ ```python
141
+ from play_parser import (
142
+ Play,
143
+ PlayIngestor,
144
+ assemble_play_text,
145
+ get_format_profile,
146
+ list_format_profiles,
147
+ load_format_profile_config,
148
+ load_format_profile_file,
149
+ validate_play_document,
150
+ )
151
+ ```
152
+
153
+ Domain classes such as `Act`, `Scene`, `Speech`, `Character`, `Monologue`, and `Dialogue` are also available from the top-level package.
154
+
155
+ ## Format profiles
156
+
157
+ Built-in profiles are available through `list_format_profiles()` and can be passed to `PlayIngestor` or the CLI by name.
158
+
159
+ ```python
160
+ from play_parser import list_format_profiles
161
+
162
+ print(list_format_profiles())
163
+ ```
164
+
165
+ See [`docs/FORMAT_PROFILES.md`](docs/FORMAT_PROFILES.md) for the profile schema and examples.
166
+
167
+ ## Documentation
168
+
169
+ - [`docs/API.md`](docs/API.md): Python API and CLI profile usage.
170
+ - [`docs/JSON_SCHEMA.md`](docs/JSON_SCHEMA.md): canonical JSON document format.
171
+ - [`docs/FORMAT_PROFILES.md`](docs/FORMAT_PROFILES.md): built-in and custom format profiles.
172
+
173
+ ## Development
174
+
175
+ Install development dependencies:
176
+
177
+ ```bash
178
+ python -m pip install -e .[dev]
179
+ ```
180
+
181
+ Run local checks:
182
+
183
+ ```bash
184
+ python -m ruff check .
185
+ python -m ruff format --check .
186
+ python -m unittest discover -s tests
187
+ python -m build
188
+ python -m twine check dist/*
189
+ ```
190
+
191
+ Release steps are documented in [`RELEASE.md`](RELEASE.md).
192
+
193
+ ## Licence
194
+
195
+ MIT
@@ -0,0 +1,162 @@
1
+ # play-parser
2
+
3
+ `play-parser` parses theatrical play text into a canonical JSON document and assembles canonical documents back into normalised play text.
4
+
5
+ Canonical text uses a stable output format. For example, speech labels are emitted in colon form such as `Hamlet: ...`, even when the source text used another supported layout.
6
+
7
+ ## Features
8
+
9
+ - Parse raw `.txt` play files into structured JSON.
10
+ - Assemble canonical JSON documents into normalised `.txt` output.
11
+ - Read and validate existing canonical `.json` documents.
12
+ - Work with explicit parsing profiles for different source formats.
13
+ - Preserve speeches, stage directions, acts, scenes, metadata, characters, and document statistics.
14
+ - Use the package from Python or through the `play-parser` command line interface.
15
+
16
+ ## Supported inputs
17
+
18
+ - Raw `.txt` play files.
19
+ - Canonical `.json` documents produced by this package.
20
+
21
+ The package does not parse PDFs, DOCX files, HTML pages, scans, images, or audio directly. Convert those sources to text first.
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install play-parser
27
+ ```
28
+
29
+ ## Python quick start
30
+
31
+ ```python
32
+ from play_parser import Play, PlayIngestor
33
+
34
+ ingestor = PlayIngestor("Hamlet.txt", profile="colon_inline")
35
+ play = Play(ingestor.data)
36
+
37
+ print(play.title)
38
+ print(play.author)
39
+ print(len(play.acts))
40
+ print(len(play.scenes))
41
+ print(len(play.characters))
42
+ print(len(play.speeches))
43
+
44
+ play.save_json("Hamlet.json")
45
+ play.save_text("Hamlet.canonical.txt")
46
+ ```
47
+
48
+ Parse text that is already in memory:
49
+
50
+ ```python
51
+ from play_parser import PlayIngestor
52
+
53
+ text = "ACT I\n\nSCENE I.\n\nHAMLET: Who's there?"
54
+ ingestor = PlayIngestor.from_text(text, source_name="Hamlet.txt", profile="colon_inline")
55
+ document = ingestor.data
56
+ ```
57
+
58
+ Assemble a canonical document:
59
+
60
+ ```python
61
+ from play_parser import assemble_play_text
62
+
63
+ canonical_text = assemble_play_text(document)
64
+ ```
65
+
66
+ ## Command line usage
67
+
68
+ Show help and version information:
69
+
70
+ ```bash
71
+ play-parser --help
72
+ play-parser --version
73
+ ```
74
+
75
+ Parse one file:
76
+
77
+ ```bash
78
+ play-parser parse Hamlet.txt \
79
+ --profile colon_inline \
80
+ --json-output Hamlet.json \
81
+ --text-output Hamlet.canonical.txt
82
+ ```
83
+
84
+ Parse a folder recursively:
85
+
86
+ ```bash
87
+ play-parser parse \
88
+ --input-root data/text/raw \
89
+ --recursive \
90
+ --profile colon_inline \
91
+ --json-output-root data/json/generated
92
+ ```
93
+
94
+ Assemble canonical JSON files into text:
95
+
96
+ ```bash
97
+ play-parser assemble \
98
+ --input-root data/json/generated \
99
+ --recursive \
100
+ --output-root data/text/canonical
101
+ ```
102
+
103
+ ## Public API
104
+
105
+ Stable top-level imports:
106
+
107
+ ```python
108
+ from play_parser import (
109
+ Play,
110
+ PlayIngestor,
111
+ assemble_play_text,
112
+ get_format_profile,
113
+ list_format_profiles,
114
+ load_format_profile_config,
115
+ load_format_profile_file,
116
+ validate_play_document,
117
+ )
118
+ ```
119
+
120
+ Domain classes such as `Act`, `Scene`, `Speech`, `Character`, `Monologue`, and `Dialogue` are also available from the top-level package.
121
+
122
+ ## Format profiles
123
+
124
+ Built-in profiles are available through `list_format_profiles()` and can be passed to `PlayIngestor` or the CLI by name.
125
+
126
+ ```python
127
+ from play_parser import list_format_profiles
128
+
129
+ print(list_format_profiles())
130
+ ```
131
+
132
+ See [`docs/FORMAT_PROFILES.md`](docs/FORMAT_PROFILES.md) for the profile schema and examples.
133
+
134
+ ## Documentation
135
+
136
+ - [`docs/API.md`](docs/API.md): Python API and CLI profile usage.
137
+ - [`docs/JSON_SCHEMA.md`](docs/JSON_SCHEMA.md): canonical JSON document format.
138
+ - [`docs/FORMAT_PROFILES.md`](docs/FORMAT_PROFILES.md): built-in and custom format profiles.
139
+
140
+ ## Development
141
+
142
+ Install development dependencies:
143
+
144
+ ```bash
145
+ python -m pip install -e .[dev]
146
+ ```
147
+
148
+ Run local checks:
149
+
150
+ ```bash
151
+ python -m ruff check .
152
+ python -m ruff format --check .
153
+ python -m unittest discover -s tests
154
+ python -m build
155
+ python -m twine check dist/*
156
+ ```
157
+
158
+ Release steps are documented in [`RELEASE.md`](RELEASE.md).
159
+
160
+ ## Licence
161
+
162
+ MIT
@@ -0,0 +1,62 @@
1
+ # Release checklist
2
+
3
+ Use this checklist when publishing a new public release.
4
+
5
+ ## 1. Prepare the repository
6
+
7
+ ```bash
8
+ python -m pip install -e .[dev]
9
+ python -m ruff check .
10
+ python -m ruff format --check .
11
+ python -m unittest discover -s tests
12
+ ```
13
+
14
+ Check that the version in `pyproject.toml` and `src/play_parser/__init__.py` has been updated. For the first `1.0.0` release, keep `CHANGELOG.md` simple: list the current release features only, or leave it with no historical entries.
15
+
16
+ ## 2. Build and inspect distributions
17
+
18
+ ```bash
19
+ rm -rf dist build src/*.egg-info
20
+ python -m build
21
+ python -m twine check dist/*
22
+ ```
23
+
24
+ Optional local wheel smoke test:
25
+
26
+ ```bash
27
+ python -m venv /tmp/play-parser-release-test
28
+ /tmp/play-parser-release-test/bin/python -m pip install dist/*.whl
29
+ /tmp/play-parser-release-test/bin/play-parser --version
30
+ /tmp/play-parser-release-test/bin/play-parser --help
31
+ ```
32
+
33
+ ## 3. Publish to TestPyPI first
34
+
35
+ ```bash
36
+ python -m twine upload --repository testpypi dist/*
37
+ ```
38
+
39
+ Install from TestPyPI in a fresh environment and check the CLI:
40
+
41
+ ```bash
42
+ python -m venv /tmp/play-parser-testpypi
43
+ /tmp/play-parser-testpypi/bin/python -m pip install \
44
+ --index-url https://test.pypi.org/simple/ \
45
+ --extra-index-url https://pypi.org/simple/ \
46
+ play-parser
47
+ /tmp/play-parser-testpypi/bin/play-parser --version
48
+ /tmp/play-parser-testpypi/bin/play-parser --help
49
+ ```
50
+
51
+ ## 4. Publish to PyPI
52
+
53
+ ```bash
54
+ python -m twine upload dist/*
55
+ ```
56
+
57
+ ## 5. Tag the release
58
+
59
+ ```bash
60
+ git tag v1.0.0
61
+ git push origin v1.0.0
62
+ ```
@@ -0,0 +1,11 @@
1
+ # Security Policy
2
+
3
+ ## Supported versions
4
+
5
+ Security fixes are considered for the latest released version of `play-parser`.
6
+
7
+ ## Reporting a vulnerability
8
+
9
+ Please report security issues privately by email rather than opening a public issue.
10
+
11
+ Include a clear description, reproduction steps where possible, and the version affected. Please do not disclose the issue publicly until it has been assessed.
@@ -0,0 +1,7 @@
1
+ # Test corpus data
2
+
3
+ The files in this directory are used as test fixtures and reference corpus material for parser development.
4
+
5
+ The raw play texts are public-domain source texts. The canonical JSON files are generated or curated fixtures used to validate parser behaviour.
6
+
7
+ These files are not required for normal package use.