py-okf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-okf
3
+ Version: 0.1.0
4
+ Summary: Python library for Open Knowledge Format (OKF) file generation and refresh
5
+ Author-email: Prabhay Gupta <coolprabhay90@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/prabhay759/py-okf
8
+ Project-URL: Repository, https://github.com/prabhay759/py-okf
9
+ Project-URL: Issues, https://github.com/prabhay759/py-okf/issues
10
+ Keywords: okf,documentation,ast,code-analysis,open-knowledge-format
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Documentation
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: PyYAML>=6.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
28
+ Requires-Dist: build>=1.0; extra == "dev"
29
+ Requires-Dist: twine>=5.0; extra == "dev"
30
+ Provides-Extra: watch
31
+ Requires-Dist: watchdog>=3.0; extra == "watch"
32
+ Dynamic: license-file
33
+
34
+ # py-okf
35
+
36
+ A Python library for generating and managing [Open Knowledge Format (OKF)](https://cloud.google.com/blog/products/data-analytics/how-the-open-knowledge-format-can-improve-data-sharing) files from Python projects.
37
+
38
+ OKF is a vendor-neutral markdown specification that lets AI agents and humans access curated knowledge without vendor lock-in — a directory of `.md` files with YAML frontmatter, one file per concept (module, class, API, dataset, etc.).
39
+
40
+ `py-okf` analyzes your Python codebase using the `ast` module and generates an `.okf/` bundle of markdown files describing your project's concepts.
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install py-okf
46
+ ```
47
+
48
+ ## CLI Usage
49
+
50
+ ```bash
51
+ # Generate OKF files for a Python project
52
+ okf generate /path/to/your/project
53
+
54
+ # Refresh stale OKF files (only regenerates files where source changed)
55
+ okf refresh /path/to/your/project
56
+
57
+ # Validate OKF files against the spec
58
+ okf validate /path/to/your/project
59
+
60
+ # Custom output directory
61
+ okf generate /path/to/your/project -o docs/okf
62
+
63
+ # Include private (underscore-prefixed) symbols
64
+ okf generate /path/to/your/project --include-private
65
+ ```
66
+
67
+ ## Python API
68
+
69
+ ```python
70
+ from pyokf import analyze_project, generate_module_concepts, OKFBundle
71
+ from pathlib import Path
72
+
73
+ # Analyze a project
74
+ modules = analyze_project(Path("./myproject"))
75
+
76
+ # Generate OKF concepts
77
+ bundle = OKFBundle(Path("./.okf"))
78
+ bundle.ensure_directory()
79
+ for module in modules:
80
+ concepts = generate_module_concepts(module)
81
+ bundle.write_all(concepts)
82
+
83
+ # Load and validate an existing bundle
84
+ bundle = OKFBundle(Path("./.okf")).load()
85
+ errors = bundle.validate_directory()
86
+ ```
87
+
88
+ ## Generated Output
89
+
90
+ Running `okf generate .` produces an `.okf/` directory with flat, dotted-name files:
91
+
92
+ ```
93
+ .okf/
94
+ ├── mypackage.md # module concept
95
+ ├── mypackage.Connection.md # class concept
96
+ └── mypackage.query.md # api concept (exported function)
97
+ ```
98
+
99
+ Each file contains YAML frontmatter followed by a markdown description:
100
+
101
+ ```markdown
102
+ ---
103
+ type: api
104
+ title: mypackage.query
105
+ description: Execute a SQL query against the active connection.
106
+ resource: ./mypackage/__init__.py
107
+ tags:
108
+ - python
109
+ - api
110
+ timestamp: '2026-06-22T10:00:00Z'
111
+ ---
112
+
113
+ # query
114
+
115
+ Execute a SQL query against the active connection.
116
+
117
+ ## Signature
118
+
119
+ def query(sql: str, params: Optional[list[str]] = None) -> list[dict]
120
+
121
+ ## Parameters
122
+
123
+ - **`sql`**: `str`
124
+ - **`params`**: `Optional[list[str]]` *(default: `None`)*
125
+
126
+ ## Returns
127
+
128
+ `list[dict]`
129
+ ```
130
+
131
+ ## Concept Types
132
+
133
+ | Type | Description |
134
+ |------|-------------|
135
+ | `module` | A Python module file |
136
+ | `class` | A class definition |
137
+ | `function` | A top-level function |
138
+ | `api` | A function listed in `__all__` (publicly exported) |
139
+
140
+ ## Requirements
141
+
142
+ - Python 3.10+
143
+ - PyYAML >= 6.0
@@ -0,0 +1,12 @@
1
+ py_okf-0.1.0.dist-info/licenses/LICENSE,sha256=3fs40A9lisNCJwF8fzRl0_HdX_n2gnpxaP_oYczkh3k,1070
2
+ pyokf/__init__.py,sha256=3SMMhG96jtNIXne0KLoxStVPHhVMCMGaExgVsKSq_s8,711
3
+ pyokf/analyzer.py,sha256=7vQcD2cc_H9H9SwQ7ak1vls0_Ewbm4vgX08ELlSuwDY,7357
4
+ pyokf/bundle.py,sha256=ntLbIGKSzOlO8t6HHf38VGFR-LXDXcO6gCtoX-JXAi0,6148
5
+ pyokf/cli.py,sha256=Eli82nPVRGOPD4r-u-7-8wHDSmGBFWmdOu6WmBGFHFk,4704
6
+ pyokf/generator.py,sha256=3bQcfoZDNjdfgcFvBYZR1aYfPZ7HWwtbpM0C1mwSz30,7125
7
+ pyokf/models.py,sha256=Tjs3WvX51UgkMkbq9sB8xzYiN3d8kBs91UJc7yqnSAs,3557
8
+ py_okf-0.1.0.dist-info/METADATA,sha256=Bag1ubPuNmDH781vK25KzTIDZCHemIAichIPco-gUgE,4152
9
+ py_okf-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ py_okf-0.1.0.dist-info/entry_points.txt,sha256=t8mt3EctJn9QyNy9TvrVnAvHM-R2HoRjOrN77yiiQMI,39
11
+ py_okf-0.1.0.dist-info/top_level.txt,sha256=XavKik6EGlWF-hSXxAQ0j5LS4kpi_DG91n4BIOrNeYU,6
12
+ py_okf-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ okf = pyokf.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Prabhay Gupta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ pyokf
pyokf/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ """py-okf: Python library for Open Knowledge Format (OKF) file generation."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from pyokf.models import (
6
+ ConceptType,
7
+ OKFConcept,
8
+ ArgumentInfo,
9
+ AttributeInfo,
10
+ MethodInfo,
11
+ ClassInfo,
12
+ FunctionInfo,
13
+ ModuleInfo,
14
+ )
15
+ from pyokf.analyzer import analyze_file, analyze_project
16
+ from pyokf.generator import generate_concept, generate_module_concepts
17
+ from pyokf.bundle import OKFBundle
18
+
19
+ __all__ = [
20
+ "ConceptType",
21
+ "OKFConcept",
22
+ "ArgumentInfo",
23
+ "AttributeInfo",
24
+ "MethodInfo",
25
+ "ClassInfo",
26
+ "FunctionInfo",
27
+ "ModuleInfo",
28
+ "analyze_file",
29
+ "analyze_project",
30
+ "generate_concept",
31
+ "generate_module_concepts",
32
+ "OKFBundle",
33
+ ]
pyokf/analyzer.py ADDED
@@ -0,0 +1,213 @@
1
+ """Python AST analyzer for py-okf."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from pyokf.models import (
10
+ ArgumentInfo,
11
+ AttributeInfo,
12
+ ClassInfo,
13
+ FunctionInfo,
14
+ MethodInfo,
15
+ ModuleInfo,
16
+ )
17
+
18
+ _EXCLUDE_DIRS = frozenset({
19
+ ".git", "__pycache__", ".tox", "build", "dist", ".eggs",
20
+ "venv", ".venv", "env", ".env", "node_modules",
21
+ ".mypy_cache", ".pytest_cache", ".ruff_cache", ".okf",
22
+ })
23
+
24
+
25
+ def _extract_args(args: ast.arguments) -> list[ArgumentInfo]:
26
+ result: list[ArgumentInfo] = []
27
+
28
+ n_args = len(args.posonlyargs) + len(args.args)
29
+ n_defaults = len(args.defaults)
30
+ default_offset = n_args - n_defaults
31
+
32
+ all_positional = args.posonlyargs + args.args
33
+ for i, arg in enumerate(all_positional):
34
+ default_idx = i - default_offset
35
+ default = ast.unparse(args.defaults[default_idx]) if default_idx >= 0 else None
36
+ annotation = ast.unparse(arg.annotation) if arg.annotation else None
37
+ kind = "positional_only" if i < len(args.posonlyargs) else "positional"
38
+ result.append(ArgumentInfo(name=arg.arg, annotation=annotation, default=default, kind=kind))
39
+
40
+ if args.vararg:
41
+ ann = ast.unparse(args.vararg.annotation) if args.vararg.annotation else None
42
+ result.append(ArgumentInfo(name=args.vararg.arg, annotation=ann, kind="var_positional"))
43
+
44
+ for i, arg in enumerate(args.kwonlyargs):
45
+ kw_default = args.kw_defaults[i]
46
+ default = ast.unparse(kw_default) if kw_default is not None else None
47
+ ann = ast.unparse(arg.annotation) if arg.annotation else None
48
+ result.append(ArgumentInfo(name=arg.arg, annotation=ann, default=default, kind="keyword_only"))
49
+
50
+ if args.kwarg:
51
+ ann = ast.unparse(args.kwarg.annotation) if args.kwarg.annotation else None
52
+ result.append(ArgumentInfo(name=args.kwarg.arg, annotation=ann, kind="var_keyword"))
53
+
54
+ return result
55
+
56
+
57
+ def _build_signature(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
58
+ unparsed = ast.unparse(func_node)
59
+ first_line = unparsed.split("\n")[0]
60
+ return first_line.rstrip(":")
61
+
62
+
63
+ def _extract_method(node: ast.FunctionDef | ast.AsyncFunctionDef) -> MethodInfo:
64
+ return MethodInfo(
65
+ name=node.name,
66
+ signature=_build_signature(node),
67
+ docstring=ast.get_docstring(node),
68
+ decorators=[ast.unparse(d) for d in node.decorator_list],
69
+ is_async=isinstance(node, ast.AsyncFunctionDef),
70
+ args=_extract_args(node.args),
71
+ returns=ast.unparse(node.returns) if node.returns else None,
72
+ line_number=node.lineno,
73
+ )
74
+
75
+
76
+ def _extract_class(node: ast.ClassDef, module_name: str, source_file: str) -> ClassInfo:
77
+ attributes: list[AttributeInfo] = []
78
+ methods: list[MethodInfo] = []
79
+
80
+ for item in node.body:
81
+ if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
82
+ attributes.append(AttributeInfo(
83
+ name=item.target.id,
84
+ annotation=ast.unparse(item.annotation) if item.annotation else None,
85
+ default=ast.unparse(item.value) if item.value else None,
86
+ ))
87
+ elif isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
88
+ methods.append(_extract_method(item))
89
+
90
+ return ClassInfo(
91
+ name=node.name,
92
+ module_name=module_name,
93
+ source_file=source_file,
94
+ docstring=ast.get_docstring(node),
95
+ bases=[ast.unparse(b) for b in node.bases],
96
+ attributes=attributes,
97
+ methods=methods,
98
+ decorators=[ast.unparse(d) for d in node.decorator_list],
99
+ line_number=node.lineno,
100
+ )
101
+
102
+
103
+ def _extract_function(
104
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
105
+ module_name: str,
106
+ source_file: str,
107
+ exports: list[str],
108
+ ) -> FunctionInfo:
109
+ return FunctionInfo(
110
+ name=node.name,
111
+ module_name=module_name,
112
+ source_file=source_file,
113
+ signature=_build_signature(node),
114
+ docstring=ast.get_docstring(node),
115
+ decorators=[ast.unparse(d) for d in node.decorator_list],
116
+ is_async=isinstance(node, ast.AsyncFunctionDef),
117
+ args=_extract_args(node.args),
118
+ returns=ast.unparse(node.returns) if node.returns else None,
119
+ is_exported=node.name in exports,
120
+ line_number=node.lineno,
121
+ )
122
+
123
+
124
+ def _extract_exports(tree: ast.Module) -> list[str]:
125
+ for node in tree.body:
126
+ if (
127
+ isinstance(node, ast.Assign)
128
+ and len(node.targets) == 1
129
+ and isinstance(node.targets[0], ast.Name)
130
+ and node.targets[0].id == "__all__"
131
+ and isinstance(node.value, (ast.List, ast.Tuple))
132
+ ):
133
+ return [
134
+ elt.s for elt in node.value.elts
135
+ if isinstance(elt, ast.Constant) and isinstance(elt.s, str)
136
+ ]
137
+ return []
138
+
139
+
140
+ def _path_to_module_name(py_file: Path, package_root: Path) -> str:
141
+ rel = py_file.relative_to(package_root)
142
+ parts = list(rel.with_suffix("").parts)
143
+ if parts and parts[-1] == "__init__":
144
+ parts = parts[:-1]
145
+ return ".".join(parts) if parts else package_root.name
146
+
147
+
148
+ def analyze_file(
149
+ py_file: Path,
150
+ module_name: Optional[str] = None,
151
+ package_root: Optional[Path] = None,
152
+ ) -> ModuleInfo:
153
+ """Analyze a single Python file and return a ModuleInfo."""
154
+ py_file = Path(py_file).resolve()
155
+ if package_root is None:
156
+ package_root = py_file.parent
157
+ else:
158
+ package_root = Path(package_root).resolve()
159
+
160
+ source = py_file.read_text(encoding="utf-8")
161
+ tree = ast.parse(source, filename=str(py_file))
162
+
163
+ if module_name is None:
164
+ module_name = _path_to_module_name(py_file, package_root)
165
+
166
+ source_file = str(py_file.relative_to(package_root))
167
+ exports = _extract_exports(tree)
168
+ classes: list[ClassInfo] = []
169
+ functions: list[FunctionInfo] = []
170
+
171
+ for node in tree.body:
172
+ if isinstance(node, ast.ClassDef):
173
+ classes.append(_extract_class(node, module_name, source_file))
174
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
175
+ functions.append(_extract_function(node, module_name, source_file, exports))
176
+
177
+ return ModuleInfo(
178
+ name=module_name,
179
+ source_file=source_file,
180
+ docstring=ast.get_docstring(tree),
181
+ classes=classes,
182
+ functions=functions,
183
+ exports=exports,
184
+ )
185
+
186
+
187
+ def analyze_project(
188
+ project_root: Path,
189
+ include_private: bool = False,
190
+ ) -> list[ModuleInfo]:
191
+ """Walk a Python project and analyze all .py files."""
192
+ project_root = Path(project_root).resolve()
193
+
194
+ src_dir = project_root / "src"
195
+ has_src_layout = src_dir.is_dir()
196
+
197
+ modules: list[ModuleInfo] = []
198
+ for py_file in sorted(project_root.rglob("*.py")):
199
+ if any(part in _EXCLUDE_DIRS for part in py_file.parts):
200
+ continue
201
+ if not include_private and py_file.name.startswith("_") and py_file.name != "__init__.py":
202
+ continue
203
+ # Use src/ as root for files inside it; project root for everything else
204
+ if has_src_layout and py_file.is_relative_to(src_dir):
205
+ package_root = src_dir
206
+ else:
207
+ package_root = project_root
208
+ try:
209
+ modules.append(analyze_file(py_file, package_root=package_root))
210
+ except SyntaxError:
211
+ pass
212
+
213
+ return modules
pyokf/bundle.py ADDED
@@ -0,0 +1,165 @@
1
+ """OKF bundle reader, writer, and validator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ from pathlib import Path
7
+
8
+ import yaml
9
+
10
+ from pyokf.models import ConceptType, OKFConcept
11
+
12
+ REQUIRED_FRONTMATTER = frozenset({"type", "title", "description", "resource", "timestamp"})
13
+ VALID_TYPES = frozenset({t.value for t in ConceptType} | {
14
+ "dataset", "metric", "playbook", "runbook", "table",
15
+ })
16
+
17
+
18
+ class ValidationError(Exception):
19
+ """Raised when an OKF file fails validation."""
20
+
21
+
22
+ class OKFBundle:
23
+ """Represents an OKF bundle directory (.okf/)."""
24
+
25
+ def __init__(self, directory: Path) -> None:
26
+ self.directory = Path(directory)
27
+ self._concepts: dict[str, OKFConcept] = {}
28
+
29
+ def load(self) -> "OKFBundle":
30
+ """Load all .md files from the bundle directory."""
31
+ if not self.directory.exists():
32
+ return self
33
+ for md_file in sorted(self.directory.glob("**/*.md")):
34
+ try:
35
+ concept = self._parse_file(md_file)
36
+ rel = str(md_file.relative_to(self.directory))
37
+ self._concepts[rel] = concept
38
+ except (ValueError, yaml.YAMLError):
39
+ pass
40
+ return self
41
+
42
+ def _parse_file(self, md_file: Path) -> OKFConcept:
43
+ content = md_file.read_text(encoding="utf-8")
44
+ if not content.startswith("---"):
45
+ raise ValueError(f"Missing frontmatter in {md_file}")
46
+ parts = content.split("---", 2)
47
+ if len(parts) < 3:
48
+ raise ValueError(f"Malformed frontmatter in {md_file}")
49
+ fm = yaml.safe_load(parts[1])
50
+ if not isinstance(fm, dict):
51
+ raise ValueError(f"Frontmatter is not a mapping in {md_file}")
52
+ body = parts[2].strip()
53
+
54
+ raw_ts = fm.get("timestamp")
55
+ if isinstance(raw_ts, datetime.datetime):
56
+ ts = raw_ts
57
+ if ts.tzinfo is None:
58
+ ts = ts.replace(tzinfo=datetime.timezone.utc)
59
+ elif isinstance(raw_ts, str):
60
+ ts = datetime.datetime.fromisoformat(raw_ts.replace("Z", "+00:00"))
61
+ else:
62
+ ts = datetime.datetime.now(tz=datetime.timezone.utc)
63
+
64
+ raw_type = fm.get("type", "function")
65
+ try:
66
+ concept_type = ConceptType(raw_type)
67
+ except ValueError:
68
+ concept_type = ConceptType.FUNCTION
69
+
70
+ known = {"type", "title", "description", "resource", "tags", "timestamp"}
71
+ extra = {k: v for k, v in fm.items() if k not in known}
72
+
73
+ return OKFConcept(
74
+ type=concept_type,
75
+ title=fm.get("title", ""),
76
+ description=fm.get("description", ""),
77
+ resource=fm.get("resource", ""),
78
+ output_path=str(md_file.relative_to(self.directory)),
79
+ tags=fm.get("tags", []),
80
+ timestamp=ts,
81
+ content=body,
82
+ extra_frontmatter=extra,
83
+ )
84
+
85
+ def write(self, concept: OKFConcept, overwrite: bool = True) -> Path:
86
+ """Write one OKFConcept to disk. Returns the path written to."""
87
+ from pyokf.generator import generate_concept
88
+
89
+ out_path = self.directory / concept.output_path
90
+ out_path.parent.mkdir(parents=True, exist_ok=True)
91
+
92
+ if not overwrite and out_path.exists():
93
+ return out_path
94
+
95
+ out_path.write_text(generate_concept(concept), encoding="utf-8")
96
+ self._concepts[concept.output_path] = concept
97
+ return out_path
98
+
99
+ def write_all(self, concepts: list[OKFConcept], overwrite: bool = True) -> list[Path]:
100
+ """Write multiple concepts, returning paths written."""
101
+ return [self.write(c, overwrite=overwrite) for c in concepts]
102
+
103
+ def needs_refresh(self, concept: OKFConcept, source_mtime: float) -> bool:
104
+ """Return True if the source file is newer than the existing OKF concept timestamp."""
105
+ existing = self._concepts.get(concept.output_path)
106
+ if existing is None:
107
+ return True
108
+ ts = existing.timestamp
109
+ if ts.tzinfo is None:
110
+ ts = ts.replace(tzinfo=datetime.timezone.utc)
111
+ source_dt = datetime.datetime.fromtimestamp(source_mtime, tz=datetime.timezone.utc)
112
+ return source_dt > ts
113
+
114
+ def validate_file(self, md_file: Path) -> list[str]:
115
+ """Validate a single .md file against the OKF spec. Returns list of error strings."""
116
+ errors: list[str] = []
117
+ try:
118
+ content = md_file.read_text(encoding="utf-8")
119
+ except OSError as e:
120
+ return [f"{md_file}: Cannot read file: {e}"]
121
+
122
+ if not content.startswith("---"):
123
+ return [f"{md_file}: Missing YAML frontmatter (file must start with ---)"]
124
+
125
+ parts = content.split("---", 2)
126
+ if len(parts) < 3:
127
+ return [f"{md_file}: Malformed frontmatter (missing closing ---)"]
128
+
129
+ try:
130
+ fm = yaml.safe_load(parts[1])
131
+ except yaml.YAMLError as e:
132
+ return [f"{md_file}: Invalid YAML: {e}"]
133
+
134
+ if not isinstance(fm, dict):
135
+ return [f"{md_file}: Frontmatter must be a YAML mapping"]
136
+
137
+ for field in sorted(REQUIRED_FRONTMATTER - set(fm.keys())):
138
+ errors.append(f"{md_file}: Missing required field '{field}'")
139
+
140
+ if "type" in fm and fm["type"] not in VALID_TYPES:
141
+ errors.append(
142
+ f"{md_file}: Invalid type '{fm['type']}'. "
143
+ f"Must be one of: {sorted(VALID_TYPES)}"
144
+ )
145
+
146
+ return errors
147
+
148
+ def validate_directory(self) -> dict[str, list[str]]:
149
+ """Validate all .md files in the bundle directory. Returns filepath -> errors dict."""
150
+ results: dict[str, list[str]] = {}
151
+ if not self.directory.exists():
152
+ return results
153
+ for md_file in sorted(self.directory.glob("**/*.md")):
154
+ errs = self.validate_file(md_file)
155
+ if errs:
156
+ results[str(md_file)] = errs
157
+ return results
158
+
159
+ @property
160
+ def concepts(self) -> list[OKFConcept]:
161
+ return list(self._concepts.values())
162
+
163
+ def ensure_directory(self) -> None:
164
+ """Create the bundle directory if it does not exist."""
165
+ self.directory.mkdir(parents=True, exist_ok=True)
pyokf/cli.py ADDED
@@ -0,0 +1,146 @@
1
+ """CLI entry point for py-okf."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from pyokf.analyzer import analyze_project
10
+ from pyokf.bundle import OKFBundle
11
+ from pyokf.generator import generate_module_concepts
12
+
13
+
14
+ def _add_common_args(parser: argparse.ArgumentParser) -> None:
15
+ parser.add_argument("path", metavar="PATH", help="Path to the Python project root")
16
+ parser.add_argument(
17
+ "-o", "--output",
18
+ default=".okf",
19
+ metavar="DIR",
20
+ help="Output directory for OKF files (default: .okf)",
21
+ )
22
+
23
+
24
+ def cmd_generate(args: argparse.Namespace) -> int:
25
+ project_root = Path(args.path).resolve()
26
+ if not project_root.exists():
27
+ print(f"error: path does not exist: {project_root}", file=sys.stderr)
28
+ return 1
29
+
30
+ okf_dir = (project_root / args.output).resolve()
31
+ bundle = OKFBundle(okf_dir)
32
+ bundle.ensure_directory()
33
+
34
+ include_private = getattr(args, "include_private", False)
35
+ modules = analyze_project(project_root, include_private=include_private)
36
+
37
+ if not modules:
38
+ print(f"No Python files found in: {project_root}")
39
+ return 0
40
+
41
+ total = 0
42
+ for module in modules:
43
+ concepts = generate_module_concepts(module, include_private=include_private)
44
+ paths = bundle.write_all(concepts)
45
+ for p in paths:
46
+ print(f" generated: {p.relative_to(project_root)}")
47
+ total += len(paths)
48
+
49
+ print(f"\nGenerated {total} OKF file(s) in {okf_dir.relative_to(project_root)}/")
50
+ return 0
51
+
52
+
53
+ def cmd_refresh(args: argparse.Namespace) -> int:
54
+ project_root = Path(args.path).resolve()
55
+ if not project_root.exists():
56
+ print(f"error: path does not exist: {project_root}", file=sys.stderr)
57
+ return 1
58
+
59
+ okf_dir = (project_root / args.output).resolve()
60
+ bundle = OKFBundle(okf_dir).load()
61
+
62
+ include_private = getattr(args, "include_private", False)
63
+ modules = analyze_project(project_root, include_private=include_private)
64
+
65
+ refreshed = 0
66
+ skipped = 0
67
+ for module in modules:
68
+ source_mtime = (project_root / module.source_file).stat().st_mtime
69
+ concepts = generate_module_concepts(module, include_private=include_private)
70
+ for concept in concepts:
71
+ if bundle.needs_refresh(concept, source_mtime):
72
+ bundle.write(concept, overwrite=True)
73
+ print(f" refreshed: {concept.output_path}")
74
+ refreshed += 1
75
+ else:
76
+ skipped += 1
77
+
78
+ print(f"\nRefreshed {refreshed}, skipped {skipped} (up-to-date) OKF file(s).")
79
+ return 0
80
+
81
+
82
+ def cmd_validate(args: argparse.Namespace) -> int:
83
+ project_root = Path(args.path).resolve()
84
+ okf_dir = (project_root / args.output).resolve()
85
+
86
+ if not okf_dir.exists():
87
+ print(f"error: OKF directory does not exist: {okf_dir}", file=sys.stderr)
88
+ return 1
89
+
90
+ bundle = OKFBundle(okf_dir)
91
+ results = bundle.validate_directory()
92
+
93
+ if not results:
94
+ total = len(list(okf_dir.glob("**/*.md")))
95
+ print(f"All {total} OKF file(s) are valid.")
96
+ return 0
97
+
98
+ total_errors = 0
99
+ for errors in results.values():
100
+ for err in errors:
101
+ print(f" {err}", file=sys.stderr)
102
+ total_errors += 1
103
+
104
+ print(f"\n{total_errors} validation error(s) found.", file=sys.stderr)
105
+ return 1
106
+
107
+
108
+ def build_parser() -> argparse.ArgumentParser:
109
+ parser = argparse.ArgumentParser(
110
+ prog="okf",
111
+ description="py-okf: Generate and manage Open Knowledge Format (OKF) files for Python projects.",
112
+ )
113
+ parser.add_argument("--version", action="version", version="%(prog)s 0.1.0")
114
+
115
+ subparsers = parser.add_subparsers(dest="command", metavar="COMMAND")
116
+ subparsers.required = True
117
+
118
+ gen_parser = subparsers.add_parser("generate", help="Generate OKF files from Python source")
119
+ _add_common_args(gen_parser)
120
+ gen_parser.add_argument(
121
+ "--include-private",
122
+ action="store_true",
123
+ help="Include private (underscore-prefixed) symbols",
124
+ )
125
+ gen_parser.set_defaults(func=cmd_generate)
126
+
127
+ ref_parser = subparsers.add_parser("refresh", help="Refresh stale OKF files")
128
+ _add_common_args(ref_parser)
129
+ ref_parser.add_argument("--include-private", action="store_true")
130
+ ref_parser.set_defaults(func=cmd_refresh)
131
+
132
+ val_parser = subparsers.add_parser("validate", help="Validate OKF files against the spec")
133
+ _add_common_args(val_parser)
134
+ val_parser.set_defaults(func=cmd_validate)
135
+
136
+ return parser
137
+
138
+
139
+ def main(argv: list[str] | None = None) -> None:
140
+ parser = build_parser()
141
+ args = parser.parse_args(argv)
142
+ sys.exit(args.func(args))
143
+
144
+
145
+ if __name__ == "__main__":
146
+ main()
pyokf/generator.py ADDED
@@ -0,0 +1,214 @@
1
+ """OKF concept generator — converts analysis results to OKFConcept objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ from typing import Optional
7
+
8
+ import yaml
9
+
10
+ from pyokf.models import (
11
+ ClassInfo,
12
+ ConceptType,
13
+ FunctionInfo,
14
+ MethodInfo,
15
+ ModuleInfo,
16
+ OKFConcept,
17
+ )
18
+
19
+
20
+ def _now_utc() -> datetime.datetime:
21
+ return datetime.datetime.now(tz=datetime.timezone.utc)
22
+
23
+
24
+ def _okf_output_path(module_name: str, suffix: str = "") -> str:
25
+ name = f"{module_name}.{suffix}" if suffix else module_name
26
+ return f"{name}.md"
27
+
28
+
29
+ def _render_method_section(method: MethodInfo) -> str:
30
+ prefix = "async " if method.is_async else ""
31
+ lines = [f"### `{prefix}{method.signature}`"]
32
+ if method.docstring:
33
+ lines += ["", method.docstring]
34
+ if method.decorators:
35
+ lines += ["", f"**Decorators:** {', '.join(f'`@{d}`' for d in method.decorators)}"]
36
+ return "\n".join(lines)
37
+
38
+
39
+ def generate_module_concept(module: ModuleInfo) -> OKFConcept:
40
+ """Generate the OKF concept for the module-level file."""
41
+ description = module.docstring or f"Python module: {module.name}"
42
+
43
+ lines = [f"# {module.name}", ""]
44
+ if module.docstring:
45
+ lines += [module.docstring, ""]
46
+
47
+ if module.classes:
48
+ lines += ["## Classes", ""]
49
+ for cls in module.classes:
50
+ cls_path = _okf_output_path(module.name, cls.name)
51
+ desc = f" — {cls.docstring.split(chr(10))[0]}" if cls.docstring else ""
52
+ lines.append(f"- [`{cls.name}`]({cls_path}){desc}")
53
+ lines.append("")
54
+
55
+ pub_funcs = module.public_functions
56
+ if pub_funcs:
57
+ lines += ["## Functions", ""]
58
+ for fn in pub_funcs:
59
+ prefix = "async " if fn.is_async else ""
60
+ exported_badge = " _(exported)_" if fn.is_exported else ""
61
+ lines.append(f"- `{prefix}{fn.signature}`{exported_badge}")
62
+ if fn.docstring:
63
+ short_doc = fn.docstring.split("\n")[0]
64
+ lines.append(f" {short_doc}")
65
+ lines.append("")
66
+
67
+ return OKFConcept(
68
+ type=ConceptType.MODULE,
69
+ title=module.name,
70
+ description=description,
71
+ resource=f"./{module.source_file}",
72
+ output_path=_okf_output_path(module.name),
73
+ tags=["python", "module"],
74
+ timestamp=_now_utc(),
75
+ content="\n".join(lines),
76
+ )
77
+
78
+
79
+ def generate_class_concept(cls: ClassInfo) -> OKFConcept:
80
+ """Generate the OKF concept for a class."""
81
+ description = cls.docstring or f"Python class: {cls.name}"
82
+
83
+ lines = [f"# {cls.name}", ""]
84
+ if cls.bases:
85
+ lines += [f"**Inherits from:** {', '.join(f'`{b}`' for b in cls.bases)}", ""]
86
+ if cls.decorators:
87
+ lines += [f"**Decorators:** {', '.join(f'`@{d}`' for d in cls.decorators)}", ""]
88
+ if cls.docstring:
89
+ lines += [cls.docstring, ""]
90
+
91
+ if cls.attributes:
92
+ lines += ["## Attributes", ""]
93
+ for attr in cls.attributes:
94
+ ann = f": {attr.annotation}" if attr.annotation else ""
95
+ default = f" = {attr.default}" if attr.default is not None else ""
96
+ lines.append(f"- `{attr.name}{ann}{default}`")
97
+ lines.append("")
98
+
99
+ init = next((m for m in cls.methods if m.name == "__init__"), None)
100
+ if init:
101
+ lines += ["## Constructor", "", _render_method_section(init), ""]
102
+
103
+ public_non_init = [m for m in cls.public_methods if m.name != "__init__"]
104
+ if public_non_init:
105
+ lines += ["## Methods", ""]
106
+ for method in public_non_init:
107
+ lines += [_render_method_section(method), ""]
108
+
109
+ dunder_non_init = [m for m in cls.dunder_methods if m.name != "__init__"]
110
+ if dunder_non_init:
111
+ lines += ["## Special Methods", ""]
112
+ for method in dunder_non_init:
113
+ lines += [_render_method_section(method), ""]
114
+
115
+ return OKFConcept(
116
+ type=ConceptType.CLASS,
117
+ title=f"{cls.module_name}.{cls.name}",
118
+ description=description,
119
+ resource=f"./{cls.source_file}",
120
+ output_path=_okf_output_path(cls.module_name, cls.name),
121
+ tags=["python", "class"],
122
+ timestamp=_now_utc(),
123
+ content="\n".join(lines),
124
+ )
125
+
126
+
127
+ def generate_function_concept(fn: FunctionInfo) -> OKFConcept:
128
+ """Generate the OKF concept for a top-level function."""
129
+ description = fn.docstring or f"Python function: {fn.name}"
130
+ concept_type = fn.concept_type
131
+
132
+ prefix = "async " if fn.is_async else ""
133
+ lines = [f"# {fn.name}", ""]
134
+ if fn.docstring:
135
+ lines += [fn.docstring, ""]
136
+
137
+ lines += ["## Signature", "", "```python", f"{prefix}{fn.signature}", "```", ""]
138
+
139
+ if fn.args:
140
+ lines += ["## Parameters", ""]
141
+ for arg in fn.args:
142
+ ann = f": `{arg.annotation}`" if arg.annotation else ""
143
+ default = f" *(default: `{arg.default}`)*" if arg.default is not None else ""
144
+ kind_note = ""
145
+ if arg.kind == "keyword_only":
146
+ kind_note = " *(keyword-only)*"
147
+ elif arg.kind == "var_positional":
148
+ kind_note = " *(variadic)*"
149
+ elif arg.kind == "var_keyword":
150
+ kind_note = " *(keyword variadic)*"
151
+ lines.append(f"- **`{arg.name}`**{ann}{default}{kind_note}")
152
+ lines.append("")
153
+
154
+ if fn.returns:
155
+ lines += ["## Returns", "", f"`{fn.returns}`", ""]
156
+
157
+ if fn.decorators:
158
+ lines += ["## Decorators", ""]
159
+ for d in fn.decorators:
160
+ lines.append(f"- `@{d}`")
161
+ lines.append("")
162
+
163
+ return OKFConcept(
164
+ type=concept_type,
165
+ title=f"{fn.module_name}.{fn.name}",
166
+ description=description,
167
+ resource=f"./{fn.source_file}",
168
+ output_path=_okf_output_path(fn.module_name, fn.name),
169
+ tags=["python", concept_type.value],
170
+ timestamp=_now_utc(),
171
+ content="\n".join(lines),
172
+ )
173
+
174
+
175
+ def generate_module_concepts(
176
+ module: ModuleInfo,
177
+ include_private: bool = False,
178
+ generate_functions: bool = True,
179
+ generate_classes: bool = True,
180
+ ) -> list[OKFConcept]:
181
+ """Generate all OKF concepts for a module and its contents."""
182
+ concepts: list[OKFConcept] = [generate_module_concept(module)]
183
+
184
+ if generate_classes:
185
+ for cls in module.classes:
186
+ concepts.append(generate_class_concept(cls))
187
+
188
+ if generate_functions:
189
+ for fn in module.functions:
190
+ if include_private or fn.is_public:
191
+ concepts.append(generate_function_concept(fn))
192
+
193
+ return concepts
194
+
195
+
196
+ def generate_concept(concept: OKFConcept) -> str:
197
+ """Render an OKFConcept to its full markdown string (frontmatter + body)."""
198
+ frontmatter: dict = {
199
+ "type": concept.type.value,
200
+ "title": concept.title,
201
+ "description": concept.description,
202
+ "resource": concept.resource,
203
+ "tags": concept.tags,
204
+ "timestamp": concept.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
205
+ }
206
+ frontmatter.update(concept.extra_frontmatter)
207
+
208
+ fm_str = yaml.dump(
209
+ frontmatter,
210
+ default_flow_style=False,
211
+ allow_unicode=True,
212
+ sort_keys=False,
213
+ )
214
+ return f"---\n{fm_str}---\n\n{concept.content.strip()}\n"
pyokf/models.py ADDED
@@ -0,0 +1,131 @@
1
+ """Data models for py-okf."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum
8
+ from typing import Optional
9
+
10
+
11
+ class ConceptType(Enum):
12
+ """Supported OKF concept types for Python constructs."""
13
+ MODULE = "module"
14
+ CLASS = "class"
15
+ FUNCTION = "function"
16
+ API = "api"
17
+
18
+
19
+ @dataclass
20
+ class OKFConcept:
21
+ """Represents one OKF file (one markdown file with YAML frontmatter)."""
22
+ type: ConceptType
23
+ title: str
24
+ description: str
25
+ resource: str
26
+ output_path: str
27
+ tags: list[str] = field(default_factory=list)
28
+ timestamp: datetime.datetime = field(
29
+ default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc)
30
+ )
31
+ content: str = ""
32
+ extra_frontmatter: dict = field(default_factory=dict)
33
+
34
+
35
+ @dataclass
36
+ class ArgumentInfo:
37
+ """Represents one parameter in a function/method signature."""
38
+ name: str
39
+ annotation: Optional[str] = None
40
+ default: Optional[str] = None
41
+ kind: str = "positional"
42
+
43
+
44
+ @dataclass
45
+ class AttributeInfo:
46
+ """Represents a class-level annotated attribute."""
47
+ name: str
48
+ annotation: Optional[str] = None
49
+ default: Optional[str] = None
50
+
51
+
52
+ @dataclass
53
+ class MethodInfo:
54
+ """Represents one method inside a class."""
55
+ name: str
56
+ signature: str
57
+ docstring: Optional[str] = None
58
+ decorators: list[str] = field(default_factory=list)
59
+ is_async: bool = False
60
+ args: list[ArgumentInfo] = field(default_factory=list)
61
+ returns: Optional[str] = None
62
+ line_number: int = 0
63
+
64
+ @property
65
+ def is_public(self) -> bool:
66
+ return not self.name.startswith("_")
67
+
68
+ @property
69
+ def is_dunder(self) -> bool:
70
+ return self.name.startswith("__") and self.name.endswith("__")
71
+
72
+
73
+ @dataclass
74
+ class FunctionInfo:
75
+ """Represents a top-level module function."""
76
+ name: str
77
+ module_name: str
78
+ source_file: str
79
+ signature: str
80
+ docstring: Optional[str] = None
81
+ decorators: list[str] = field(default_factory=list)
82
+ is_async: bool = False
83
+ args: list[ArgumentInfo] = field(default_factory=list)
84
+ returns: Optional[str] = None
85
+ is_exported: bool = False
86
+ line_number: int = 0
87
+
88
+ @property
89
+ def is_public(self) -> bool:
90
+ return not self.name.startswith("_")
91
+
92
+ @property
93
+ def concept_type(self) -> ConceptType:
94
+ return ConceptType.API if self.is_exported else ConceptType.FUNCTION
95
+
96
+
97
+ @dataclass
98
+ class ClassInfo:
99
+ """Represents a class definition extracted from AST."""
100
+ name: str
101
+ module_name: str
102
+ source_file: str
103
+ docstring: Optional[str] = None
104
+ bases: list[str] = field(default_factory=list)
105
+ attributes: list[AttributeInfo] = field(default_factory=list)
106
+ methods: list[MethodInfo] = field(default_factory=list)
107
+ decorators: list[str] = field(default_factory=list)
108
+ line_number: int = 0
109
+
110
+ @property
111
+ def public_methods(self) -> list[MethodInfo]:
112
+ return [m for m in self.methods if m.is_public]
113
+
114
+ @property
115
+ def dunder_methods(self) -> list[MethodInfo]:
116
+ return [m for m in self.methods if m.is_dunder]
117
+
118
+
119
+ @dataclass
120
+ class ModuleInfo:
121
+ """Represents a fully analyzed Python module file."""
122
+ name: str
123
+ source_file: str
124
+ docstring: Optional[str] = None
125
+ classes: list[ClassInfo] = field(default_factory=list)
126
+ functions: list[FunctionInfo] = field(default_factory=list)
127
+ exports: list[str] = field(default_factory=list)
128
+
129
+ @property
130
+ def public_functions(self) -> list[FunctionInfo]:
131
+ return [f for f in self.functions if f.is_public]