jostack-mdparse 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. jostack_mdparse-0.1.0/.editorconfig +18 -0
  2. jostack_mdparse-0.1.0/.gitattributes +5 -0
  3. jostack_mdparse-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +39 -0
  4. jostack_mdparse-0.1.0/.github/ISSUE_TEMPLATE/config.yml +5 -0
  5. jostack_mdparse-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +23 -0
  6. jostack_mdparse-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +17 -0
  7. jostack_mdparse-0.1.0/.github/dependabot.yml +17 -0
  8. jostack_mdparse-0.1.0/.github/release.yml +14 -0
  9. jostack_mdparse-0.1.0/.github/workflows/ci.yml +52 -0
  10. jostack_mdparse-0.1.0/.github/workflows/claude-code-review.yml +29 -0
  11. jostack_mdparse-0.1.0/.github/workflows/codeql.yml +23 -0
  12. jostack_mdparse-0.1.0/.github/workflows/notify-downstream.yml +24 -0
  13. jostack_mdparse-0.1.0/.github/workflows/release.yml +58 -0
  14. jostack_mdparse-0.1.0/.gitignore +207 -0
  15. jostack_mdparse-0.1.0/CHANGELOG.md +18 -0
  16. jostack_mdparse-0.1.0/CLAUDE.md +32 -0
  17. jostack_mdparse-0.1.0/CODE_OF_CONDUCT.md +18 -0
  18. jostack_mdparse-0.1.0/CONTRIBUTING.md +33 -0
  19. jostack_mdparse-0.1.0/LICENSE +190 -0
  20. jostack_mdparse-0.1.0/Makefile +20 -0
  21. jostack_mdparse-0.1.0/PKG-INFO +124 -0
  22. jostack_mdparse-0.1.0/README.md +103 -0
  23. jostack_mdparse-0.1.0/SECURITY.md +10 -0
  24. jostack_mdparse-0.1.0/options.json +100 -0
  25. jostack_mdparse-0.1.0/package.json +10 -0
  26. jostack_mdparse-0.1.0/pyproject.toml +55 -0
  27. jostack_mdparse-0.1.0/scripts/generate-langchain.mjs +284 -0
  28. jostack_mdparse-0.1.0/scripts/generate-options.mjs +102 -0
  29. jostack_mdparse-0.1.0/scripts/utils.mjs +107 -0
  30. jostack_mdparse-0.1.0/src/jostack_mdparse/__init__.py +7 -0
  31. jostack_mdparse-0.1.0/src/jostack_mdparse/cli.py +137 -0
  32. jostack_mdparse-0.1.0/src/jostack_mdparse/extract.py +337 -0
  33. jostack_mdparse-0.1.0/tests/conftest.py +21 -0
  34. jostack_mdparse-0.1.0/tests/fixtures/sample.md +43 -0
  35. jostack_mdparse-0.1.0/tests/test_extract.py +181 -0
@@ -0,0 +1,18 @@
1
+ root = true
2
+
3
+ [*]
4
+ end_of_line = lf
5
+ insert_final_newline = true
6
+ trim_trailing_whitespace = true
7
+ charset = utf-8
8
+
9
+ [*.py]
10
+ indent_style = space
11
+ indent_size = 4
12
+
13
+ [*.{yml,yaml,json,toml,md}]
14
+ indent_style = space
15
+ indent_size = 2
16
+
17
+ [Makefile]
18
+ indent_style = tab
@@ -0,0 +1,5 @@
1
+ * text=auto eol=lf
2
+ *.py text eol=lf
3
+ *.md text eol=lf
4
+ *.yml text eol=lf
5
+ *.json text eol=lf
@@ -0,0 +1,39 @@
1
+ name: Bug Report
2
+ description: Report a bug in md-extract
3
+ labels: [bug]
4
+ body:
5
+ - type: textarea
6
+ id: description
7
+ attributes:
8
+ label: Description
9
+ description: A clear description of the bug.
10
+ validations:
11
+ required: true
12
+ - type: textarea
13
+ id: steps
14
+ attributes:
15
+ label: Steps to Reproduce
16
+ description: Steps to reproduce the behavior.
17
+ validations:
18
+ required: true
19
+ - type: textarea
20
+ id: expected
21
+ attributes:
22
+ label: Expected Behavior
23
+ description: What you expected to happen.
24
+ validations:
25
+ required: true
26
+ - type: input
27
+ id: version
28
+ attributes:
29
+ label: Version
30
+ description: md-extract version
31
+ validations:
32
+ required: true
33
+ - type: input
34
+ id: python
35
+ attributes:
36
+ label: Python Version
37
+ description: Python version (e.g. 3.12)
38
+ validations:
39
+ required: true
@@ -0,0 +1,5 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Questions & Discussions
4
+ url: https://github.com/hyunhee-jo/md-extract/discussions
5
+ about: Ask questions and discuss ideas
@@ -0,0 +1,23 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature
3
+ labels: [enhancement]
4
+ body:
5
+ - type: textarea
6
+ id: description
7
+ attributes:
8
+ label: Description
9
+ description: A clear description of the feature you'd like.
10
+ validations:
11
+ required: true
12
+ - type: textarea
13
+ id: motivation
14
+ attributes:
15
+ label: Motivation
16
+ description: Why is this feature important?
17
+ validations:
18
+ required: true
19
+ - type: textarea
20
+ id: alternatives
21
+ attributes:
22
+ label: Alternatives Considered
23
+ description: Any alternative solutions you've considered.
@@ -0,0 +1,17 @@
1
+ ## Summary
2
+
3
+ <!-- Brief description of the changes -->
4
+
5
+ ## Changes
6
+
7
+ <!-- List of changes made -->
8
+
9
+ -
10
+
11
+ ## Test Plan
12
+
13
+ <!-- How were these changes tested? -->
14
+
15
+ - [ ] Unit tests pass (`pytest tests/ -v`)
16
+ - [ ] Linting passes (`ruff check src/ tests/`)
17
+ - [ ] Type checking passes (`mypy src/`)
@@ -0,0 +1,17 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ groups:
8
+ uv:
9
+ patterns: ["*"]
10
+
11
+ - package-ecosystem: "github-actions"
12
+ directory: "/"
13
+ schedule:
14
+ interval: "weekly"
15
+ groups:
16
+ actions:
17
+ patterns: ["*"]
@@ -0,0 +1,14 @@
1
+ changelog:
2
+ categories:
3
+ - title: "🚀 Features"
4
+ labels: [enhancement]
5
+ - title: "🐛 Bug Fixes"
6
+ labels: [bug]
7
+ - title: "📖 Documentation"
8
+ labels: [documentation]
9
+ - title: "🔧 Maintenance"
10
+ labels: [chore, dependencies]
11
+ - title: "🔒 Security"
12
+ labels: [security]
13
+ exclude:
14
+ labels: [skip-changelog]
@@ -0,0 +1,52 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+ workflow_dispatch:
7
+
8
+ concurrency:
9
+ group: ci-${{ github.event.pull_request.number || github.ref }}
10
+ cancel-in-progress: true
11
+
12
+ permissions: {}
13
+
14
+ jobs:
15
+ unit-test:
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ matrix:
19
+ python-version: ['3.10', '3.12', '3.13']
20
+ timeout-minutes: 10
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+ - uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+ - name: Install dependencies
27
+ run: pip install -e . pytest pytest-socket pytest-cov
28
+ - name: Run tests
29
+ run: pytest tests/ -v --disable-socket --allow-unix-socket --cov=md_extract --cov-report=xml
30
+ - name: Upload coverage
31
+ if: matrix.python-version == '3.13'
32
+ uses: codecov/codecov-action@v5
33
+ with:
34
+ files: coverage.xml
35
+ fail_ci_if_error: false
36
+
37
+ lint:
38
+ runs-on: ubuntu-latest
39
+ timeout-minutes: 5
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+ - uses: actions/setup-python@v5
43
+ with:
44
+ python-version: '3.13'
45
+ - name: Install ruff and mypy
46
+ run: pip install ruff mypy
47
+ - name: Ruff check
48
+ run: ruff check src/ tests/
49
+ - name: Ruff format check
50
+ run: ruff format --check src/ tests/
51
+ - name: Mypy
52
+ run: mypy src/
@@ -0,0 +1,29 @@
1
+ name: Claude Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize]
6
+
7
+ permissions:
8
+ contents: read
9
+ pull-requests: write
10
+
11
+ jobs:
12
+ review:
13
+ runs-on: ubuntu-latest
14
+ timeout-minutes: 10
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: anthropics/claude-code-action@v1
18
+ with:
19
+ anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
20
+ review_comment: |
21
+ Review this PR for:
22
+ - Code quality and bugs
23
+ - Performance issues
24
+ - Security vulnerabilities
25
+ - Test coverage gaps
26
+ allowed_tools: |
27
+ gh pr comment
28
+ gh pr view
29
+ gh pr diff
@@ -0,0 +1,23 @@
1
+ name: CodeQL
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+ schedule:
9
+ - cron: '0 6 * * 1'
10
+
11
+ permissions:
12
+ security-events: write
13
+
14
+ jobs:
15
+ analyze:
16
+ runs-on: ubuntu-latest
17
+ timeout-minutes: 15
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - uses: github/codeql-action/init@v3
21
+ with:
22
+ languages: python
23
+ - uses: github/codeql-action/analyze@v3
@@ -0,0 +1,24 @@
1
+ name: Notify Downstream
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions: {}
8
+
9
+ jobs:
10
+ notify:
11
+ runs-on: ubuntu-latest
12
+ timeout-minutes: 5
13
+ steps:
14
+ - name: Extract version
15
+ id: version
16
+ run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
17
+
18
+ - name: Trigger langchain sync
19
+ uses: peter-evans/repository-dispatch@v3
20
+ with:
21
+ token: ${{ secrets.LANGCHAIN_SYNC_TOKEN }}
22
+ repository: hyunhee-jo/langchain-md-extract
23
+ event-type: upstream-release
24
+ client-payload: '{"version": "${{ steps.version.outputs.version }}"}'
@@ -0,0 +1,58 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags: ['v*']
6
+
7
+ permissions:
8
+ contents: write
9
+ id-token: write
10
+
11
+ jobs:
12
+ release:
13
+ runs-on: ubuntu-latest
14
+ timeout-minutes: 15
15
+ environment:
16
+ name: pypi
17
+ url: https://pypi.org/p/md-extract
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - uses: actions/setup-python@v5
22
+ with:
23
+ python-version: '3.13'
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v4
27
+
28
+ - name: Extract version
29
+ id: version
30
+ run: |
31
+ VERSION="${GITHUB_REF_NAME#v}"
32
+ echo "value=$VERSION" >> "$GITHUB_OUTPUT"
33
+
34
+ - name: Inject version
35
+ env:
36
+ VERSION: ${{ steps.version.outputs.value }}
37
+ run: |
38
+ sed -i "s/^version = \".*\"/version = \"${VERSION}\"/" pyproject.toml
39
+
40
+ - name: Build
41
+ run: uv build
42
+
43
+ - name: Verify build
44
+ run: |
45
+ pip install twine
46
+ twine check dist/*
47
+
48
+ - name: Publish to PyPI
49
+ uses: pypa/gh-action-pypi-publish@release/v1
50
+
51
+ - name: Create GitHub Release
52
+ env:
53
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54
+ TAG: ${{ github.ref_name }}
55
+ run: |
56
+ gh release create "$TAG" dist/* \
57
+ --title "$TAG" \
58
+ --generate-notes
@@ -0,0 +1,207 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+
12
+ - Initial project structure with CLI and Python API
13
+ - `extract` command: parse Markdown to JSON, text, or HTML
14
+ - `toc` command: print heading tree
15
+ - `meta` command: print frontmatter metadata
16
+ - 12 extraction options via `options.json` (Single Source of Truth)
17
+ - CI workflow with Python 3.10/3.12/3.13 matrix
18
+ - Claude Code automated PR review
@@ -0,0 +1,32 @@
1
+ # CLAUDE.md
2
+
3
+ ## Project Overview
4
+
5
+ jostack-mdparse is a Markdown file parser and structured extraction tool with CLI and Python API.
6
+
7
+ ## Architecture
8
+
9
+ - `src/jostack_mdparse/extract.py` — Core extraction logic
10
+ - `src/jostack_mdparse/cli.py` — CLI entry point (argparse, 3 subcommands: extract/toc/meta)
11
+ - `options.json` — Single Source of Truth for all CLI options (12 options)
12
+ - `scripts/generate-options.mjs` — Code generation from options.json
13
+
14
+ ## Key Design Decisions
15
+
16
+ - **options.json is SsoT**: All CLI options are defined here, not in code
17
+ - **SYNCED markers**: Used in langchain-jostack-mdparse for auto-sync
18
+ - **Version 0.0.0**: Version is injected at build time via sed in release.yml
19
+
20
+ ## Commands
21
+
22
+ ```bash
23
+ make test # Run tests (pytest + pytest-socket)
24
+ make lint # Run ruff + mypy
25
+ make format # Auto-format with ruff
26
+ ```
27
+
28
+ ## Conventions
29
+
30
+ - Commits: conventional commit format (English)
31
+ - PRs: single purpose, <10 files, <500 lines
32
+ - Python: ruff lint + ruff format + mypy strict
@@ -0,0 +1,18 @@
1
+ # Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We are committed to providing a friendly, safe and welcoming environment for all.
6
+
7
+ ## Our Standards
8
+
9
+ Examples of behavior that contributes to a positive environment:
10
+
11
+ - Using welcoming and inclusive language
12
+ - Being respectful of differing viewpoints and experiences
13
+ - Gracefully accepting constructive criticism
14
+ - Focusing on what is best for the community
15
+
16
+ ## Enforcement
17
+
18
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting the maintainer.
@@ -0,0 +1,33 @@
1
+ # Contributing
2
+
3
+ Thank you for your interest in contributing to jostack-mdparse!
4
+
5
+ ## Development Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/hyunhee-jo/jostack-mdparse.git
9
+ cd jostack-mdparse
10
+ pip install -e .
11
+ pip install pytest pytest-socket ruff mypy
12
+ ```
13
+
14
+ ## Running Tests
15
+
16
+ ```bash
17
+ make test
18
+ ```
19
+
20
+ ## Code Style
21
+
22
+ This project uses [ruff](https://docs.astral.sh/ruff/) for linting and formatting, and [mypy](https://mypy-lang.org/) for type checking.
23
+
24
+ ```bash
25
+ make lint # Check
26
+ make format # Auto-fix
27
+ ```
28
+
29
+ ## Pull Requests
30
+
31
+ - One PR per feature/fix (keep PRs small: <10 files, <500 lines)
32
+ - All tests must pass
33
+ - Follow conventional commit messages