PyPI - manualforge - Versions diffs - 0.1.1__tar.gz - Mend

manualforge 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

manualforge-0.1.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,236 @@
+Metadata-Version: 2.4
+Name: manualforge
+Version: 0.1.1
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: kedro~=1.2.0
+Requires-Dist: kedro-datasets~=9.2
+Requires-Dist: polars>=1.0
+Requires-Dist: polars-runtime-compat>=1.38.1
+Requires-Dist: fastexcel~=0.19
+Requires-Dist: Jinja2>=3.0
+Requires-Dist: duckdb>=1.0
+Provides-Extra: docs
+Requires-Dist: docutils<0.21; extra == "docs"
+Requires-Dist: sphinx<7.3,>=5.3; extra == "docs"
+Requires-Dist: sphinx_rtd_theme==2.0.0; extra == "docs"
+Requires-Dist: nbsphinx==0.8.1; extra == "docs"
+Requires-Dist: sphinx-autodoc-typehints==1.20.2; extra == "docs"
+Requires-Dist: sphinx_copybutton==0.5.2; extra == "docs"
+Requires-Dist: ipykernel<7.0,>=5.3; extra == "docs"
+Requires-Dist: Jinja2<3.2.0; extra == "docs"
+Requires-Dist: myst-parser<2.1,>=1.0; extra == "docs"
+Provides-Extra: dev
+Requires-Dist: ipython>=8.10; extra == "dev"
+Requires-Dist: jupyterlab>=3.0; extra == "dev"
+Requires-Dist: notebook; extra == "dev"
+Requires-Dist: pytest-cov<7,>=3; extra == "dev"
+Requires-Dist: pytest-mock<2.0,>=1.7.1; extra == "dev"
+Requires-Dist: pytest~=9.0; extra == "dev"
+Requires-Dist: ruff~=0.15.0; extra == "dev"
+# ManualForge
+> **Configuration-driven management manual generation framework.**
+> Define your data sources, fields, and templates in YAML — get a formatted report.
+Built on [Kedro](https://kedro.org) pipelines with [Polars](https://pola.rs) for data processing and [Typst](https://typst.app) for document rendering.
+## Philosophy
+ManualForge separates **what** you want to produce from **how** it's produced.
+- **What**: Defined in `conf/base/parameters_manualforge.yml` — your data sources, expected columns, standardization rules, sort orders, summary dimensions, and report templates.
+- **How**: Implemented by the pipeline nodes — reusable data processing functions that read from your config.
+To create a new manual for a different domain, you only need to edit the config file (and optionally provide new templates). No Python code changes required.
+## Features
+| Capability | Description |
+|---|---|
+| **Multi-sheet Excel ingestion** | Auto-detect headers, filter cover sheets, merge into structured DataFrames |
+| **Field standardization** | Mapping files + exact matching + fuzzy matching (difflib / duckdb) |
+| **Config-driven summaries** | Define group-by dimensions, sort orders, ability categories, and output paths in YAML |
+| **Typst report generation** | Jinja2 templates → Typst source → PDF compilation |
+| **Pipeline hooks** | Shell command hooks at pipeline/node granularity for pre/post processing |
+## Quick Start
+```bash
+# 1. Install dependencies
+pip install -r requirements.txt
+# 2. Copy and customize configuration
+cp conf/examples/parameters_manualforge.yml.example conf/base/parameters_manualforge.yml
+cp conf/examples/catalog.yml.example          conf/base/catalog.yml
+cp conf/examples/hooks.yml.example            conf/base/hooks.yml
+cp conf/examples/parameters.yml.example       conf/base/parameters.yml
+cp conf/examples/credentials.yml.example      conf/local/credentials.yml
+# 3. Edit the config files to point to your data sources
+#    (conf/base/ is gitignored — your real configs stay local)
+# 4. Run the pipeline
+kedro run
+# Run specific node groups
+kedro run --tags conversion        # Excel → Parquet only
+kedro run --tags standardization   # Standardization only
+kedro run --tags csv               # Summary tables only
+```
+## Project Structure
+```
+├── conf/
+│   ├── base/                          # ★ Gitignored — copy from examples/
+│   │   ├── parameters_manualforge.yml # Central project configuration
+│   │   ├── catalog.yml                # Kedro data catalog
+│   │   ├── hooks.yml                  # Pipeline hooks (shell commands)
+│   │   └── parameters.yml             # Pipeline parameters
+│   ├── examples/                      # ★ Tracked example templates
+│   │   ├── parameters_manualforge.yml.example
+│   │   ├── catalog.yml.example
+│   │   ├── hooks.yml.example
+│   │   ├── parameters.yml.example
+│   │   └── credentials.yml.example
+│   ├── local/                         # Local-only (gitignored)
+│   │   └── credentials.yml
+│   └── logging.yml
+├── data/                              # Gitignored except .gitkeep
+│   ├── 01_raw/                        # Raw Excel/CSV + mapping files
+│   ├── 02_intermediate/              # Parquet, reconcile reports
+│   ├── 03_primary/                   # Standardized data
+│   ├── 04_feature/                   # Summary tables (CSV + Markdown)
+│   └── 08_reporting/                 # Typst sources & compiled PDFs
+├── scripts/                          # Auxiliary scripts
+│   ├── convert_csv_to_md.py          # CSV → Markdown conversion
+│   ├── extract_rule_field_mapping.py # Rule field extraction
+│   ├── extract_rule_overview.py      # Rule overview extraction
+│   └── render_with_forge.py          # Markdown → DOCX/PDF rendering
+├── src/manualforge/                  # Framework source code
+│   ├── config.py                     # Configuration helper utilities
+│   ├── hooks.py                      # Kedro pipeline hooks
+│   ├── io/                           # Custom Kedro datasets (PolarsExcelDataset)
+│   ├── pipelines/                    # Pipeline definitions & node functions
+│   └── settings.py                   # Kedro project settings
+├── templates/                        # Jinja2 Typst templates
+│   └── report.typ.j2
+├── pyproject.toml                    # Project metadata & dependencies
+└── requirements.txt
+```
+## Configuration Guide
+The central configuration file is `conf/base/parameters_manualforge.yml`. Copy from `conf/examples/` and customize:
+### 1. Data Sources
+Define your Excel files, expected headers, and sheet filtering rules:
+```yaml
+datasources:
+  primary_data:
+    filepath: "data/01_raw/your_data.xlsx"
+    sheet:
+      exclude_names: ["封面", "封皮"]
+      name_becomes_column: "sheet_name"
+    header_detection:
+      mode: keyword_match
+      expected_headers:
+        - "column_a"
+        - "column_b"
+    cleaning:
+      drop_rows_where:
+        column_a: ["column_a"]   # drop residual header rows
+      fill_null: forward
+      deduplicate: true
+```
+### 2. Field Standardization
+Define which fields to standardize, their mapping files, and special corrections:
+```yaml
+standardization:
+  fields:
+    - name: "dept_name"
+      mapping_file: "data/01_raw/dept_list"
+      case_corrections:
+        wrong_name: "correct_name"
+      special_mappings:
+        alias: "canonical_name"
+      fuzzy:
+        enabled: true
+        threshold: 0.8
+        method: difflib             # difflib | duckdb
+```
+### 3. Sort Orders
+Define reusable sort order lists referenced by summaries:
+```yaml
+sort_orders:
+  model_names:
+    - "Model A"
+    - "Model B"
+  dep_names:
+    - "HR"
+    - "Finance"
+```
+### 4. Summaries
+Define what summary tables to generate:
+```yaml
+summaries:
+  my_summary:
+    description: "Fields grouped by model and department"
+    group_by: ["model", "department"]
+    struct_columns: ["module", "system", "field_name"]
+    sort_by:
+      department: dep_names
+    output:
+      csv: "data/04_feature/my_summary.csv"
+```
+### 5. Reports
+Define report templates and output:
+```yaml
+reports:
+  my_report:
+    description: "Rules cookbook"
+    template_source: inline
+    data_source: rules_data
+    output_typ: "data/08_reporting/output.typ"
+    typst_compile:
+      enabled: true
+```
+## Data Layers
+| Layer | Directory | Description |
+|---|---|---|
+| Raw | `data/01_raw/` | Source Excel/CSV files, mapping files |
+| Intermediate | `data/02_intermediate/` | Parquet, reconcile reports |
+| Primary | `data/03_primary/` | Standardized data |
+| Feature | `data/04_feature/` | Summary tables (CSV + Markdown) |
+| Reporting | `data/08_reporting/` | Typst sources & PDF output |
+## Requirements
+- Python >= 3.10
+- [Typst](https://github.com/typst/typst) CLI (for PDF compilation)
+## Development
+```bash
+pip install -e ".[dev]"
+ruff check src/
+pytest
+```

manualforge-0.1.1/README.md ADDED Viewed

@@ -0,0 +1,205 @@
+# ManualForge
+> **Configuration-driven management manual generation framework.**
+> Define your data sources, fields, and templates in YAML — get a formatted report.
+Built on [Kedro](https://kedro.org) pipelines with [Polars](https://pola.rs) for data processing and [Typst](https://typst.app) for document rendering.
+## Philosophy
+ManualForge separates **what** you want to produce from **how** it's produced.
+- **What**: Defined in `conf/base/parameters_manualforge.yml` — your data sources, expected columns, standardization rules, sort orders, summary dimensions, and report templates.
+- **How**: Implemented by the pipeline nodes — reusable data processing functions that read from your config.
+To create a new manual for a different domain, you only need to edit the config file (and optionally provide new templates). No Python code changes required.
+## Features
+| Capability | Description |
+|---|---|
+| **Multi-sheet Excel ingestion** | Auto-detect headers, filter cover sheets, merge into structured DataFrames |
+| **Field standardization** | Mapping files + exact matching + fuzzy matching (difflib / duckdb) |
+| **Config-driven summaries** | Define group-by dimensions, sort orders, ability categories, and output paths in YAML |
+| **Typst report generation** | Jinja2 templates → Typst source → PDF compilation |
+| **Pipeline hooks** | Shell command hooks at pipeline/node granularity for pre/post processing |
+## Quick Start
+```bash
+# 1. Install dependencies
+pip install -r requirements.txt
+# 2. Copy and customize configuration
+cp conf/examples/parameters_manualforge.yml.example conf/base/parameters_manualforge.yml
+cp conf/examples/catalog.yml.example          conf/base/catalog.yml
+cp conf/examples/hooks.yml.example            conf/base/hooks.yml
+cp conf/examples/parameters.yml.example       conf/base/parameters.yml
+cp conf/examples/credentials.yml.example      conf/local/credentials.yml
+# 3. Edit the config files to point to your data sources
+#    (conf/base/ is gitignored — your real configs stay local)
+# 4. Run the pipeline
+kedro run
+# Run specific node groups
+kedro run --tags conversion        # Excel → Parquet only
+kedro run --tags standardization   # Standardization only
+kedro run --tags csv               # Summary tables only
+```
+## Project Structure
+```
+├── conf/
+│   ├── base/                          # ★ Gitignored — copy from examples/
+│   │   ├── parameters_manualforge.yml # Central project configuration
+│   │   ├── catalog.yml                # Kedro data catalog
+│   │   ├── hooks.yml                  # Pipeline hooks (shell commands)
+│   │   └── parameters.yml             # Pipeline parameters
+│   ├── examples/                      # ★ Tracked example templates
+│   │   ├── parameters_manualforge.yml.example
+│   │   ├── catalog.yml.example
+│   │   ├── hooks.yml.example
+│   │   ├── parameters.yml.example
+│   │   └── credentials.yml.example
+│   ├── local/                         # Local-only (gitignored)
+│   │   └── credentials.yml
+│   └── logging.yml
+├── data/                              # Gitignored except .gitkeep
+│   ├── 01_raw/                        # Raw Excel/CSV + mapping files
+│   ├── 02_intermediate/              # Parquet, reconcile reports
+│   ├── 03_primary/                   # Standardized data
+│   ├── 04_feature/                   # Summary tables (CSV + Markdown)
+│   └── 08_reporting/                 # Typst sources & compiled PDFs
+├── scripts/                          # Auxiliary scripts
+│   ├── convert_csv_to_md.py          # CSV → Markdown conversion
+│   ├── extract_rule_field_mapping.py # Rule field extraction
+│   ├── extract_rule_overview.py      # Rule overview extraction
+│   └── render_with_forge.py          # Markdown → DOCX/PDF rendering
+├── src/manualforge/                  # Framework source code
+│   ├── config.py                     # Configuration helper utilities
+│   ├── hooks.py                      # Kedro pipeline hooks
+│   ├── io/                           # Custom Kedro datasets (PolarsExcelDataset)
+│   ├── pipelines/                    # Pipeline definitions & node functions
+│   └── settings.py                   # Kedro project settings
+├── templates/                        # Jinja2 Typst templates
+│   └── report.typ.j2
+├── pyproject.toml                    # Project metadata & dependencies
+└── requirements.txt
+```
+## Configuration Guide
+The central configuration file is `conf/base/parameters_manualforge.yml`. Copy from `conf/examples/` and customize:
+### 1. Data Sources
+Define your Excel files, expected headers, and sheet filtering rules:
+```yaml
+datasources:
+  primary_data:
+    filepath: "data/01_raw/your_data.xlsx"
+    sheet:
+      exclude_names: ["封面", "封皮"]
+      name_becomes_column: "sheet_name"
+    header_detection:
+      mode: keyword_match
+      expected_headers:
+        - "column_a"
+        - "column_b"
+    cleaning:
+      drop_rows_where:
+        column_a: ["column_a"]   # drop residual header rows
+      fill_null: forward
+      deduplicate: true
+```
+### 2. Field Standardization
+Define which fields to standardize, their mapping files, and special corrections:
+```yaml
+standardization:
+  fields:
+    - name: "dept_name"
+      mapping_file: "data/01_raw/dept_list"
+      case_corrections:
+        wrong_name: "correct_name"
+      special_mappings:
+        alias: "canonical_name"
+      fuzzy:
+        enabled: true
+        threshold: 0.8
+        method: difflib             # difflib | duckdb
+```
+### 3. Sort Orders
+Define reusable sort order lists referenced by summaries:
+```yaml
+sort_orders:
+  model_names:
+    - "Model A"
+    - "Model B"
+  dep_names:
+    - "HR"
+    - "Finance"
+```
+### 4. Summaries
+Define what summary tables to generate:
+```yaml
+summaries:
+  my_summary:
+    description: "Fields grouped by model and department"
+    group_by: ["model", "department"]
+    struct_columns: ["module", "system", "field_name"]
+    sort_by:
+      department: dep_names
+    output:
+      csv: "data/04_feature/my_summary.csv"
+```
+### 5. Reports
+Define report templates and output:
+```yaml
+reports:
+  my_report:
+    description: "Rules cookbook"
+    template_source: inline
+    data_source: rules_data
+    output_typ: "data/08_reporting/output.typ"
+    typst_compile:
+      enabled: true
+```
+## Data Layers
+| Layer | Directory | Description |
+|---|---|---|
+| Raw | `data/01_raw/` | Source Excel/CSV files, mapping files |
+| Intermediate | `data/02_intermediate/` | Parquet, reconcile reports |
+| Primary | `data/03_primary/` | Standardized data |
+| Feature | `data/04_feature/` | Summary tables (CSV + Markdown) |
+| Reporting | `data/08_reporting/` | Typst sources & PDF output |
+## Requirements
+- Python >= 3.10
+- [Typst](https://github.com/typst/typst) CLI (for PDF compilation)
+## Development
+```bash
+pip install -e ".[dev]"
+ruff check src/
+pytest
+```

manualforge-0.1.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,92 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+[project]
+requires-python = ">=3.10"
+name = "manualforge"
+readme = "README.md"
+dynamic = ["version"]
+dependencies = [
+    "kedro~=1.2.0",
+    "kedro-datasets~=9.2",
+    "polars>=1.0",
+    "polars-runtime-compat>=1.38.1",
+    "fastexcel~=0.19",
+    "Jinja2>=3.0",
+    "duckdb>=1.0",
+]
+[project.scripts]
+"manualforge" = "manualforge.__main__:main"
+[project.entry-points."kedro.hooks"]
+[project.optional-dependencies]
+docs = [
+    "docutils<0.21",
+    "sphinx>=5.3,<7.3",
+    "sphinx_rtd_theme==2.0.0",
+    "nbsphinx==0.8.1",
+    "sphinx-autodoc-typehints==1.20.2",
+    "sphinx_copybutton==0.5.2",
+    "ipykernel>=5.3, <7.0",
+    "Jinja2<3.2.0",
+    "myst-parser>=1.0,<2.1"
+]
+dev = [
+    "ipython>=8.10",
+    "jupyterlab>=3.0",
+    "notebook",
+    "pytest-cov>=3,<7",
+    "pytest-mock>=1.7.1, <2.0",
+    "pytest~=9.0",
+    "ruff~=0.15.0"
+]
+[tool.setuptools.dynamic]
+version = {attr = "manualforge.__version__"}
+[tool.setuptools.packages.find]
+where = ["src"]
+namespaces = false
+[tool.kedro]
+package_name = "manualforge"
+project_name = "manualforge"
+kedro_init_version = "1.1.1"
+tools = "['Linting', 'Testing', 'Custom Logging', 'Documentation', 'Data Structure']"
+example_pipeline = "False"
+source_dir = "src"
+[tool.pytest.ini_options]
+addopts = """
+--cov-report term-missing \
+--cov src/manualforge -ra"""
+[tool.coverage.report]
+fail_under = 0
+show_missing = true
+exclude_lines = ["pragma: no cover", "raise NotImplementedError"]
+[tool.ruff.format]
+docstring-code-format = true
+[tool.ruff]
+line-length = 88
+show-fixes = true
+[tool.ruff.lint]
+select = [
+    "F",   # Pyflakes
+    "W",   # pycodestyle
+    "E",   # pycodestyle
+    "I",   # isort
+    "UP",  # pyupgrade
+    "PL",  # Pylint
+    "T201", # Print Statement
+]
+ignore = ["E501"]  # Ruff format takes care of line-too-long
+[tool.kedro_telemetry]
+project_id = "d5d6a6859fac4a9c899980b536809946"

manualforge-0.1.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

manualforge-0.1.1/src/manualforge/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""ManualForge — A configuration-driven management manual generation framework."""
+__version__ = "0.1.1"

manualforge-0.1.1/src/manualforge/__main__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""manualforge file for ensuring the package is executable
+as `manualforge` and `python -m manualforge`
+"""
+import sys
+from pathlib import Path
+from typing import Any
+from kedro.framework.cli.utils import find_run_command
+from kedro.framework.project import configure_project
+def main(*args, **kwargs) -> Any:
+    package_name = Path(__file__).parent.name
+    configure_project(package_name)
+    interactive = hasattr(sys, "ps1")
+    kwargs["standalone_mode"] = not interactive
+    run = find_run_command(package_name)
+    return run(*args, **kwargs)
+if __name__ == "__main__":
+    main()

manualforge-0.1.1/src/manualforge/config.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""ManualForge configuration helpers.
+Utilities for safely reading project configuration and providing sensible
+defaults, so node functions stay clean when operating in config-driven mode.
+"""
+from __future__ import annotations
+import logging
+from typing import Any
+logger = logging.getLogger(__name__)
+# Sentinel for "not set" to distinguish from explicit None.
+_UNSET = object()
+def get_datasource(config: dict, source_id: str) -> dict:
+    """Return the datasource sub-config for *source_id*."""
+    sources = config.get("datasources", {})
+    if source_id not in sources:
+        raise KeyError(f"Datasource '{source_id}' not found in config.datasources")
+    return sources[source_id]
+def get_standardization(config: dict) -> dict:
+    """Return the standardization config section."""
+    return config.get("standardization", {})
+def get_standardization_fields(config: dict) -> list[dict]:
+    """Return the list of field-standardization definitions."""
+    return get_standardization(config).get("fields", [])
+def get_summary(config: dict, summary_id: str) -> dict:
+    """Return the summary sub-config for *summary_id*."""
+    summaries = config.get("summaries", {})
+    if summary_id not in summaries:
+        raise KeyError(f"Summary '{summary_id}' not found in config.summaries")
+    return summaries[summary_id]
+def get_sort_order(config: dict, order_name: str) -> list[str]:
+    """Return a named sort-order list."""
+    return config.get("sort_orders", {}).get(order_name, [])
+def _resolve_sort_ref(config: dict, ref: str | list) -> list[str]:
+    """Resolve a sort_by value which is either a sort-order name or an inline list."""
+    if isinstance(ref, list):
+        return ref
+    if isinstance(ref, str):
+        return get_sort_order(config, ref)
+    return []
+def get_sort_list(config: dict, sort_by: dict) -> dict[str, list[str]]:
+    """Resolve a sort_by dict {column: order_name_or_list} → {column: [values]}."""
+    return {col: _resolve_sort_ref(config, ref) for col, ref in sort_by.items()}
+def get_report(config: dict, report_id: str) -> dict:
+    """Return the report sub-config for *report_id*."""
+    reports = config.get("reports", {})
+    if report_id not in reports:
+        raise KeyError(f"Report '{report_id}' not found in config.reports")
+    return reports[report_id]
+def get_project(config: dict) -> dict:
+    """Return the project metadata section."""
+    return config.get("project", {})