MarkdownHeaderTextSplitter 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Wenxi Wang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: MarkdownHeaderTextSplitter
3
+ Version: 0.1.0
4
+ Summary: A heading-aware Markdown splitter with a simple local UI
5
+ Author: Wenxi Wang
6
+ License: MIT
7
+ Project-URL: Homepage, https://example.com/MarkdownHeaderTextSplitter
8
+ Project-URL: Repository, https://example.com/MarkdownHeaderTextSplitter
9
+ Keywords: markdown,splitter,rag,chunking,ui
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: streamlit>=1.32
25
+ Dynamic: license-file
26
+
27
+ # MarkdownHeaderTextSplitter
28
+
29
+ A small, original, heading-aware Markdown splitter with a simple local UI for preparing Markdown files for RAG or inspection.
30
+
31
+ ## What it does
32
+
33
+ - Accepts a `.md` file in a local UI
34
+ - Respects Markdown heading structure like `#`, `##`, and `###`
35
+ - Lets you choose how many chunks you want
36
+ - Outputs the split Markdown chunks as separate `.md` files inside a downloadable `.zip`
37
+ - Keeps the implementation lightweight and original instead of copying third-party splitter code
38
+
39
+ ## Install
40
+
41
+ ```bash
42
+ pip install MarkdownHeaderTextSplitter
43
+ ```
44
+
45
+ ## Run
46
+
47
+ ```bash
48
+ MarkdownHeaderTextSplitter
49
+ ```
50
+
51
+ This launches a local Streamlit app in your browser.
52
+
53
+ ## Notes
54
+
55
+ - This package does **not** vendor or copy code from LangChain or other companies.
56
+ - The idea of heading-aware splitting is common, but you should still do your own name, trademark, licensing, and patent review before publishing publicly.
57
+ - PyPI package-name availability can change over time, so confirm the final project name before upload.
58
+
59
+ ## File structure
60
+
61
+ ```text
62
+ MarkdownHeaderTextSplitter_pypi/
63
+ README.md
64
+ LICENSE
65
+ pyproject.toml
66
+ markdownheadertextsplitter/
67
+ __init__.py
68
+ app.py
69
+ cli.py
70
+ core.py
71
+ dist/
72
+ MarkdownHeaderTextSplitter-0.1.0.tar.gz
73
+ markdownheadertextsplitter-0.1.0-py3-none-any.whl
74
+ ```
@@ -0,0 +1,13 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ MarkdownHeaderTextSplitter.egg-info/PKG-INFO
5
+ MarkdownHeaderTextSplitter.egg-info/SOURCES.txt
6
+ MarkdownHeaderTextSplitter.egg-info/dependency_links.txt
7
+ MarkdownHeaderTextSplitter.egg-info/entry_points.txt
8
+ MarkdownHeaderTextSplitter.egg-info/requires.txt
9
+ MarkdownHeaderTextSplitter.egg-info/top_level.txt
10
+ markdownheadertextsplitter/__init__.py
11
+ markdownheadertextsplitter/app.py
12
+ markdownheadertextsplitter/cli.py
13
+ markdownheadertextsplitter/core.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ MarkdownHeaderTextSplitter = markdownheadertextsplitter.cli:main
@@ -0,0 +1 @@
1
+ markdownheadertextsplitter
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: MarkdownHeaderTextSplitter
3
+ Version: 0.1.0
4
+ Summary: A heading-aware Markdown splitter with a simple local UI
5
+ Author: Wenxi Wang
6
+ License: MIT
7
+ Project-URL: Homepage, https://example.com/MarkdownHeaderTextSplitter
8
+ Project-URL: Repository, https://example.com/MarkdownHeaderTextSplitter
9
+ Keywords: markdown,splitter,rag,chunking,ui
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: streamlit>=1.32
25
+ Dynamic: license-file
26
+
27
+ # MarkdownHeaderTextSplitter
28
+
29
+ A small, original, heading-aware Markdown splitter with a simple local UI for preparing Markdown files for RAG or inspection.
30
+
31
+ ## What it does
32
+
33
+ - Accepts a `.md` file in a local UI
34
+ - Respects Markdown heading structure like `#`, `##`, and `###`
35
+ - Lets you choose how many chunks you want
36
+ - Outputs the split Markdown chunks as separate `.md` files inside a downloadable `.zip`
37
+ - Keeps the implementation lightweight and original instead of copying third-party splitter code
38
+
39
+ ## Install
40
+
41
+ ```bash
42
+ pip install MarkdownHeaderTextSplitter
43
+ ```
44
+
45
+ ## Run
46
+
47
+ ```bash
48
+ MarkdownHeaderTextSplitter
49
+ ```
50
+
51
+ This launches a local Streamlit app in your browser.
52
+
53
+ ## Notes
54
+
55
+ - This package does **not** vendor or copy code from LangChain or other companies.
56
+ - The idea of heading-aware splitting is common, but you should still do your own name, trademark, licensing, and patent review before publishing publicly.
57
+ - PyPI package-name availability can change over time, so confirm the final project name before upload.
58
+
59
+ ## File structure
60
+
61
+ ```text
62
+ MarkdownHeaderTextSplitter_pypi/
63
+ README.md
64
+ LICENSE
65
+ pyproject.toml
66
+ markdownheadertextsplitter/
67
+ __init__.py
68
+ app.py
69
+ cli.py
70
+ core.py
71
+ dist/
72
+ MarkdownHeaderTextSplitter-0.1.0.tar.gz
73
+ markdownheadertextsplitter-0.1.0-py3-none-any.whl
74
+ ```
@@ -0,0 +1,48 @@
1
+ # MarkdownHeaderTextSplitter
2
+
3
+ A small, original, heading-aware Markdown splitter with a simple local UI for preparing Markdown files for RAG or inspection.
4
+
5
+ ## What it does
6
+
7
+ - Accepts a `.md` file in a local UI
8
+ - Respects Markdown heading structure like `#`, `##`, and `###`
9
+ - Lets you choose how many chunks you want
10
+ - Outputs the split Markdown chunks as separate `.md` files inside a downloadable `.zip`
11
+ - Keeps the implementation lightweight and original instead of copying third-party splitter code
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pip install MarkdownHeaderTextSplitter
17
+ ```
18
+
19
+ ## Run
20
+
21
+ ```bash
22
+ MarkdownHeaderTextSplitter
23
+ ```
24
+
25
+ This launches a local Streamlit app in your browser.
26
+
27
+ ## Notes
28
+
29
+ - This package does **not** vendor or copy code from LangChain or other companies.
30
+ - The idea of heading-aware splitting is common, but you should still do your own name, trademark, licensing, and patent review before publishing publicly.
31
+ - PyPI package-name availability can change over time, so confirm the final project name before upload.
32
+
33
+ ## File structure
34
+
35
+ ```text
36
+ MarkdownHeaderTextSplitter_pypi/
37
+ README.md
38
+ LICENSE
39
+ pyproject.toml
40
+ markdownheadertextsplitter/
41
+ __init__.py
42
+ app.py
43
+ cli.py
44
+ core.py
45
+ dist/
46
+ MarkdownHeaderTextSplitter-0.1.0.tar.gz
47
+ markdownheadertextsplitter-0.1.0-py3-none-any.whl
48
+ ```
@@ -0,0 +1,17 @@
1
+ """MarkdownHeaderTextSplitter package."""
2
+
3
+ __all__ = [
4
+ "parse_markdown_sections",
5
+ "split_markdown_into_chunks",
6
+ "chunk_manifest",
7
+ "build_zip_bytes",
8
+ ]
9
+
10
+ __version__ = "0.1.0"
11
+
12
+ from .core import ( # noqa: E402
13
+ build_zip_bytes,
14
+ chunk_manifest,
15
+ parse_markdown_sections,
16
+ split_markdown_into_chunks,
17
+ )
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ import streamlit as st
4
+
5
+ from markdownheadertextsplitter.core import build_zip_bytes, chunk_manifest, split_markdown_into_chunks
6
+
7
+
8
+ st.set_page_config(
9
+ page_title="MarkdownHeaderTextSplitter",
10
+ page_icon="🧩",
11
+ layout="wide",
12
+ )
13
+
14
+ st.title("MarkdownHeaderTextSplitter")
15
+ st.caption("Split Markdown into smaller heading-aware chunks for RAG, review, or preprocessing.")
16
+
17
+ with st.sidebar:
18
+ st.header("Settings")
19
+ target_chunks = st.slider("Target number of chunks", min_value=1, max_value=40, value=6)
20
+ max_heading_level = st.select_slider("Respect headings through level", options=[1, 2, 3, 4, 5, 6], value=3)
21
+ include_preamble = st.checkbox("Keep preamble before first heading", value=True)
22
+ hard_max_chars = st.number_input(
23
+ "Safety cap: max characters per chunk (0 = disabled)",
24
+ min_value=0,
25
+ max_value=50000,
26
+ value=0,
27
+ step=100,
28
+ )
29
+
30
+ uploaded = st.file_uploader("Drag a Markdown file here", type=["md"])
31
+
32
+ if uploaded is None:
33
+ st.info("Upload a .md file to preview and export split chunks.")
34
+ st.stop()
35
+
36
+ raw_bytes = uploaded.read()
37
+ text = raw_bytes.decode("utf-8", errors="ignore")
38
+
39
+ chunks = split_markdown_into_chunks(
40
+ text,
41
+ target_chunks=target_chunks,
42
+ max_heading_level=max_heading_level,
43
+ include_preamble=include_preamble,
44
+ hard_max_chars=hard_max_chars,
45
+ )
46
+ manifest = chunk_manifest(chunks, uploaded.name)
47
+ zip_bytes = build_zip_bytes(chunks, uploaded.name)
48
+
49
+ col1, col2 = st.columns([1, 2])
50
+ with col1:
51
+ st.metric("Output chunks", len(chunks))
52
+ st.metric("Input characters", len(text))
53
+ st.metric("Avg chunk characters", int(sum(len(c) for c in chunks) / max(1, len(chunks))))
54
+ st.download_button(
55
+ label="Download split .zip",
56
+ data=zip_bytes,
57
+ file_name=f"{uploaded.name.rsplit('.', 1)[0]}_chunks.zip",
58
+ mime="application/zip",
59
+ )
60
+
61
+ with col2:
62
+ st.subheader("Chunk manifest")
63
+ st.dataframe(manifest, use_container_width=True)
64
+
65
+ st.subheader("Chunk preview")
66
+ for item, chunk in zip(manifest, chunks):
67
+ with st.expander(f"{item['file_name']} — {item['char_count']} chars"):
68
+ st.code(chunk, language="markdown")
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import sys
5
+
6
+
7
+ def main() -> None:
8
+ try:
9
+ from streamlit.web import cli as stcli
10
+ except Exception as exc: # pragma: no cover
11
+ raise SystemExit(
12
+ "Streamlit is required to launch the UI. Install the package dependencies first."
13
+ ) from exc
14
+
15
+ app_path = Path(__file__).with_name("app.py")
16
+ sys.argv = ["streamlit", "run", str(app_path)]
17
+ raise SystemExit(stcli.main())
@@ -0,0 +1,258 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from io import BytesIO
5
+ import json
6
+ import math
7
+ import re
8
+ import zipfile
9
+ from pathlib import Path
10
+ from typing import Iterable, List
11
+
12
+ HEADING_RE = re.compile(r"^\\?(#{1,6})\s+(.*\S)\s*$")
13
+
14
+
15
+ @dataclass
16
+ class Section:
17
+ heading_level: int
18
+ heading_text: str
19
+ content: str
20
+
21
+ @property
22
+ def char_count(self) -> int:
23
+ return len(self.content)
24
+
25
+
26
+ def _normalize_newlines(text: str) -> str:
27
+ return text.replace("\r\n", "\n").replace("\r", "\n")
28
+
29
+
30
+ def parse_markdown_sections(
31
+ text: str,
32
+ *,
33
+ max_heading_level: int = 3,
34
+ include_preamble: bool = True,
35
+ ) -> List[Section]:
36
+ """Split a markdown document into ordered sections based on headings.
37
+
38
+ The implementation is intentionally simple and original:
39
+ - Headings up to ``max_heading_level`` start a new section.
40
+ - The heading line is kept inside the section content.
41
+ - Optional preamble text before the first heading becomes its own section.
42
+ """
43
+ text = _normalize_newlines(text).strip("\n")
44
+ if not text:
45
+ return []
46
+
47
+ sections: List[Section] = []
48
+ current_lines: List[str] = []
49
+ current_level = 0
50
+ current_title = "Preamble"
51
+ saw_heading = False
52
+
53
+ for line in text.split("\n"):
54
+ match = HEADING_RE.match(line)
55
+ if match:
56
+ level = len(match.group(1))
57
+ title = match.group(2).strip()
58
+ if level <= max_heading_level:
59
+ if current_lines and (include_preamble or saw_heading):
60
+ sections.append(
61
+ Section(
62
+ heading_level=current_level,
63
+ heading_text=current_title,
64
+ content="\n".join(current_lines).strip() + "\n",
65
+ )
66
+ )
67
+ current_lines = [line]
68
+ current_level = level
69
+ current_title = title
70
+ saw_heading = True
71
+ continue
72
+ current_lines.append(line)
73
+
74
+ if current_lines and (include_preamble or saw_heading):
75
+ sections.append(
76
+ Section(
77
+ heading_level=current_level,
78
+ heading_text=current_title,
79
+ content="\n".join(current_lines).strip() + "\n",
80
+ )
81
+ )
82
+
83
+ if not sections:
84
+ return [Section(heading_level=0, heading_text="Document", content=text + "\n")]
85
+ return sections
86
+
87
+
88
+ def _split_text_by_paragraphs(text: str) -> List[str]:
89
+ parts = [part.strip() for part in re.split(r"\n\s*\n", text) if part.strip()]
90
+ return [part + "\n\n" for part in parts] or [text]
91
+
92
+
93
+ def _split_text_by_size(text: str, target_parts: int) -> List[str]:
94
+ if target_parts <= 1 or len(text) < 2:
95
+ return [text]
96
+ paragraphs = _split_text_by_paragraphs(text)
97
+ if len(paragraphs) >= target_parts:
98
+ total_chars = sum(len(p) for p in paragraphs)
99
+ desired = max(1, math.ceil(total_chars / target_parts))
100
+ out: List[str] = []
101
+ bucket: List[str] = []
102
+ bucket_size = 0
103
+ remaining_parts = target_parts
104
+ remaining_paragraphs = paragraphs[:]
105
+
106
+ for para in remaining_paragraphs:
107
+ remaining_paragraphs_left = len(remaining_paragraphs)
108
+ remaining_paragraphs.pop(0)
109
+ bucket.append(para)
110
+ bucket_size += len(para)
111
+
112
+ must_close = len(out) + 1 == target_parts
113
+ enough_size = bucket_size >= desired
114
+ enough_remaining = len(remaining_paragraphs) >= (remaining_parts - 1)
115
+
116
+ if must_close or (enough_size and enough_remaining):
117
+ out.append("".join(bucket).strip() + "\n")
118
+ bucket = []
119
+ bucket_size = 0
120
+ remaining_parts -= 1
121
+
122
+ if bucket:
123
+ out.append("".join(bucket).strip() + "\n")
124
+ return [chunk for chunk in out if chunk.strip()]
125
+
126
+ # Fallback: raw char slicing if there are not enough paragraphs.
127
+ text = text.strip()
128
+ step = max(1, math.ceil(len(text) / target_parts))
129
+ out = []
130
+ start = 0
131
+ while start < len(text):
132
+ out.append(text[start : start + step].strip() + "\n")
133
+ start += step
134
+ return [chunk for chunk in out if chunk.strip()]
135
+
136
+
137
+ def _merge_sections_to_target(sections: List[Section], target_chunks: int) -> List[str]:
138
+ if target_chunks <= 1 or len(sections) <= 1:
139
+ return ["\n".join(section.content.strip() for section in sections if section.content.strip()) + "\n"]
140
+
141
+ total_chars = sum(section.char_count for section in sections)
142
+ desired = max(1, math.ceil(total_chars / target_chunks))
143
+
144
+ chunks: List[str] = []
145
+ bucket: List[str] = []
146
+ bucket_chars = 0
147
+ remaining_sections = sections[:]
148
+ remaining_buckets = target_chunks
149
+
150
+ while remaining_sections:
151
+ section = remaining_sections.pop(0)
152
+ bucket.append(section.content.strip())
153
+ bucket_chars += section.char_count
154
+
155
+ must_close = len(chunks) + 1 == target_chunks
156
+ enough_size = bucket_chars >= desired
157
+ enough_remaining = len(remaining_sections) >= (remaining_buckets - 1)
158
+
159
+ if must_close or (enough_size and enough_remaining):
160
+ chunks.append("\n\n".join(bucket).strip() + "\n")
161
+ bucket = []
162
+ bucket_chars = 0
163
+ remaining_buckets -= 1
164
+
165
+ if bucket:
166
+ chunks.append("\n\n".join(bucket).strip() + "\n")
167
+
168
+ return [chunk for chunk in chunks if chunk.strip()]
169
+
170
+
171
+ def _expand_sections_to_target(sections: List[Section], target_chunks: int) -> List[str]:
172
+ chunks = [section.content.strip() + "\n" for section in sections if section.content.strip()]
173
+ if not chunks:
174
+ return []
175
+
176
+ while len(chunks) < target_chunks:
177
+ idx = max(range(len(chunks)), key=lambda i: len(chunks[i]))
178
+ biggest = chunks[idx]
179
+ if len(biggest) < 400:
180
+ break
181
+ split_parts = _split_text_by_size(biggest, 2)
182
+ if len(split_parts) <= 1:
183
+ break
184
+ chunks = chunks[:idx] + split_parts + chunks[idx + 1 :]
185
+
186
+ return chunks
187
+
188
+
189
+ def _apply_hard_max_chars(chunks: Iterable[str], hard_max_chars: int) -> List[str]:
190
+ out: List[str] = []
191
+ for chunk in chunks:
192
+ if hard_max_chars <= 0 or len(chunk) <= hard_max_chars:
193
+ out.append(chunk.strip() + "\n")
194
+ continue
195
+ parts = _split_text_by_size(chunk, max(2, math.ceil(len(chunk) / hard_max_chars)))
196
+ out.extend(part.strip() + "\n" for part in parts if part.strip())
197
+ return out
198
+
199
+
200
+ def split_markdown_into_chunks(
201
+ text: str,
202
+ *,
203
+ target_chunks: int = 6,
204
+ max_heading_level: int = 3,
205
+ include_preamble: bool = True,
206
+ hard_max_chars: int = 0,
207
+ ) -> List[str]:
208
+ sections = parse_markdown_sections(
209
+ text,
210
+ max_heading_level=max_heading_level,
211
+ include_preamble=include_preamble,
212
+ )
213
+ if not sections:
214
+ return []
215
+
216
+ if target_chunks <= 1:
217
+ chunks = ["\n\n".join(section.content.strip() for section in sections).strip() + "\n"]
218
+ elif len(sections) == target_chunks:
219
+ chunks = [section.content.strip() + "\n" for section in sections]
220
+ elif len(sections) > target_chunks:
221
+ chunks = _merge_sections_to_target(sections, target_chunks)
222
+ else:
223
+ chunks = _expand_sections_to_target(sections, target_chunks)
224
+
225
+ chunks = _apply_hard_max_chars(chunks, hard_max_chars)
226
+ return [chunk for chunk in chunks if chunk.strip()]
227
+
228
+
229
+ def chunk_manifest(chunks: Iterable[str], source_name: str) -> list[dict]:
230
+ manifest = []
231
+ for index, chunk in enumerate(chunks, start=1):
232
+ line_count = chunk.count("\n") + 1
233
+ manifest.append(
234
+ {
235
+ "chunk_index": index,
236
+ "file_name": f"{Path(source_name).stem}_chunk_{index:03d}.md",
237
+ "char_count": len(chunk),
238
+ "line_count": line_count,
239
+ "preview": chunk[:180].replace("\n", " ").strip(),
240
+ }
241
+ )
242
+ return manifest
243
+
244
+
245
+ def build_zip_bytes(chunks: Iterable[str], source_name: str) -> bytes:
246
+ chunks = list(chunks)
247
+ manifest = chunk_manifest(chunks, source_name)
248
+ stem = Path(source_name).stem or "markdown"
249
+
250
+ buffer = BytesIO()
251
+ with zipfile.ZipFile(buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
252
+ for item, chunk in zip(manifest, chunks):
253
+ zf.writestr(item["file_name"], chunk)
254
+ zf.writestr(
255
+ f"{stem}_manifest.json",
256
+ json.dumps(manifest, indent=2, ensure_ascii=False),
257
+ )
258
+ return buffer.getvalue()
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "MarkdownHeaderTextSplitter"
7
+ version = "0.1.0"
8
+ description = "A heading-aware Markdown splitter with a simple local UI"
9
+ authors = [{name="Wenxi Wang"}]
10
+ readme = "README.md"
11
+ requires-python = ">=3.9"
12
+ license = {text = "MIT"}
13
+ dependencies = [
14
+ "streamlit>=1.32",
15
+ ]
16
+ keywords = ["markdown", "splitter", "rag", "chunking", "ui"]
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Topic :: Text Processing :: Markup :: Markdown",
28
+ "Topic :: Software Development :: Libraries :: Python Modules",
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://example.com/MarkdownHeaderTextSplitter"
33
+ Repository = "https://example.com/MarkdownHeaderTextSplitter"
34
+
35
+ [project.scripts]
36
+ MarkdownHeaderTextSplitter = "markdownheadertextsplitter.cli:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+