govio 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. govio-0.2.11/.gitignore +23 -0
  2. govio-0.2.11/AGENTS.md +251 -0
  3. govio-0.2.11/CLAUDE.md +71 -0
  4. govio-0.2.11/LICENSE +21 -0
  5. govio-0.2.11/PKG-INFO +473 -0
  6. govio-0.2.11/README.md +447 -0
  7. govio-0.2.11/docs/connect.json +9 -0
  8. govio-0.2.11/docs/design-thoughts.md +42 -0
  9. govio-0.2.11/docs/eval-skill.md +439 -0
  10. govio-0.2.11/docs/metric/Design.md +63 -0
  11. govio-0.2.11/docs/ontology.md +105 -0
  12. govio-0.2.11/docs/plans/2026-03-13-relationship-json-reader-design.md +136 -0
  13. govio-0.2.11/docs/plans/2026-03-13-relationship-json-reader.md +879 -0
  14. govio-0.2.11/docs/plans/2026-03-18-mcp-data-exploration-plan.md +1956 -0
  15. govio-0.2.11/docs/plans/2026-03-31-config-refactor-design.md +73 -0
  16. govio-0.2.11/docs/plans/2026-03-31-config-refactor-plan.md +414 -0
  17. govio-0.2.11/docs/plans/2026-03-31-unified-query-entry-design.md +72 -0
  18. govio-0.2.11/docs/plans/2026-03-31-unified-query-entry.md +226 -0
  19. govio-0.2.11/docs/plans/2026-04-02-metadata-onboard-implementation.md +287 -0
  20. govio-0.2.11/docs/plans/2026-04-02-metadata-onboard-redesign.md +61 -0
  21. govio-0.2.11/docs/plans/2026-04-02-onboard-wizard-design.md +439 -0
  22. govio-0.2.11/docs/plans/2026-04-02-onboard-wizard-implementation.md +1076 -0
  23. govio-0.2.11/docs/plans/2026-04-17-onboard-datasource-improvement-design.md +159 -0
  24. govio-0.2.11/docs/plans/2026-04-17-onboard-datasource-improvement.md +403 -0
  25. govio-0.2.11/docs/roadmap.md +85 -0
  26. govio-0.2.11/docs/self-service-data-analytics-with-govio.md +309 -0
  27. govio-0.2.11/docs/semantic-layer.md +152 -0
  28. govio-0.2.11/docs/specs/README.md +37 -0
  29. govio-0.2.11/docs/specs/cli.md +119 -0
  30. govio-0.2.11/docs/specs/config.md +101 -0
  31. govio-0.2.11/docs/specs/core.md +60 -0
  32. govio-0.2.11/docs/specs/data-model.md +69 -0
  33. govio-0.2.11/docs/specs/data_standard/data_standard_recommendation.md +417 -0
  34. govio-0.2.11/docs/specs/data_standard/recommender_usage.md +391 -0
  35. govio-0.2.11/docs/specs/graph.md +85 -0
  36. govio-0.2.11/docs/specs/metadata.md +287 -0
  37. govio-0.2.11/docs/specs/observe-data.md +175 -0
  38. govio-0.2.11/docs/superpowers/plans/2026-04-15-observe-cli-design.md +739 -0
  39. govio-0.2.11/docs/superpowers/plans/2026-05-28-metadata-loader-fusion.md +722 -0
  40. govio-0.2.11/docs/superpowers/plans/2026-06-09-config-restructure.md +646 -0
  41. govio-0.2.11/docs/superpowers/specs/2026-05-28-metadata-loader-fusion-design.md +208 -0
  42. govio-0.2.11/docs/superpowers/specs/2026-06-09-config-restructure-design.md +134 -0
  43. govio-0.2.11/docs/test_case_fix_todo.md +14 -0
  44. govio-0.2.11/docs//346/225/260/346/215/256/346/216/242/346/272/220/344/275/234/344/270/232/346/226/271/346/263/225.md +410 -0
  45. govio-0.2.11/pyproject.toml +48 -0
  46. govio-0.2.11/skills/govio/SKILL.md +100 -0
  47. govio-0.2.11/skills/govio/eval.md +762 -0
  48. govio-0.2.11/skills/govio/govio-metadata/SKILL.md +81 -0
  49. govio-0.2.11/skills/govio/govio-metadata/reference-falkordb.md +166 -0
  50. govio-0.2.11/skills/govio/govio-metadata/reference-networkx.md +181 -0
  51. govio-0.2.11/skills/govio/govio-metrics/SKILL.md +367 -0
  52. govio-0.2.11/skills/govio/govio-metrics/playbooks/simple-metric-query.md +193 -0
  53. govio-0.2.11/skills/govio/govio-metrics/scripts/query_example.json +17 -0
  54. govio-0.2.11/skills/govio/govio-metrics/scripts/sql_builder.py +343 -0
  55. govio-0.2.11/skills/govio-observe/SKILL.md +330 -0
  56. govio-0.2.11/skills/observe-compare-dfs/SKILL.md +137 -0
  57. govio-0.2.11/skills/observe-dataset-ops/SKILL.md +376 -0
  58. govio-0.2.11/skills/observe-explore-relations/SKILL.md +146 -0
  59. govio-0.2.11/src/govio/__init__.py +10 -0
  60. govio-0.2.11/src/govio/cli/__init__.py +4 -0
  61. govio-0.2.11/src/govio/cli/config.py +136 -0
  62. govio-0.2.11/src/govio/cli/main.py +106 -0
  63. govio-0.2.11/src/govio/cli/meta_export.py +263 -0
  64. govio-0.2.11/src/govio/cli/observe.py +241 -0
  65. govio-0.2.11/src/govio/cli/onboard.py +610 -0
  66. govio-0.2.11/src/govio/cli/query.py +129 -0
  67. govio-0.2.11/src/govio/cli/std_recommend.py +70 -0
  68. govio-0.2.11/src/govio/core/__init__.py +4 -0
  69. govio-0.2.11/src/govio/core/assets_generator.py +235 -0
  70. govio-0.2.11/src/govio/core/graph_factory.py +54 -0
  71. govio-0.2.11/src/govio/graph/__init__.py +4 -0
  72. govio-0.2.11/src/govio/graph/falkordb_graph.py +100 -0
  73. govio-0.2.11/src/govio/graph/networkx_graph.py +73 -0
  74. govio-0.2.11/src/govio/metadata/__init__.py +25 -0
  75. govio-0.2.11/src/govio/metadata/application.py +51 -0
  76. govio-0.2.11/src/govio/metadata/database.py +160 -0
  77. govio-0.2.11/src/govio/metadata/duckdb_loader.py +69 -0
  78. govio-0.2.11/src/govio/metadata/gen_networkx.py +136 -0
  79. govio-0.2.11/src/govio/metadata/metric.py +329 -0
  80. govio-0.2.11/src/govio/metadata/metric_schema.json +194 -0
  81. govio-0.2.11/src/govio/metadata/recommender.py +471 -0
  82. govio-0.2.11/src/govio/metadata/relationship.py +305 -0
  83. govio-0.2.11/src/govio/metadata/standard.py +125 -0
  84. govio-0.2.11/src/govio/metadata/utility.py +235 -0
  85. govio-0.2.11/src/govio/observe_data/__init__.py +1 -0
  86. govio-0.2.11/src/govio/observe_data/config.py +39 -0
  87. govio-0.2.11/src/govio/observe_data/core/__init__.py +1 -0
  88. govio-0.2.11/src/govio/observe_data/core/comparator.py +52 -0
  89. govio-0.2.11/src/govio/observe_data/core/database.py +64 -0
  90. govio-0.2.11/src/govio/observe_data/core/dataframe_store.py +66 -0
  91. govio-0.2.11/src/govio/observe_data/core/explorer.py +106 -0
  92. govio-0.2.11/src/govio/observe_data/core/observe_store.py +179 -0
  93. govio-0.2.11/src/govio/observe_data/core/visualizer.py +50 -0
  94. govio-0.2.11/src/govio/observe_data/tools/__init__.py +1 -0
  95. govio-0.2.11/src/govio/observe_data/tools/list_dataframes.py +29 -0
  96. govio-0.2.11/src/govio/observe_data/tools/list_datasources.py +30 -0
  97. govio-0.2.11/src/govio/observe_data/tools/load_dataframe.py +95 -0
  98. govio-0.2.11/src/govio/observe_data/tools/release_dataframe.py +44 -0
  99. govio-0.2.11/src/govio/observe_data/tools/visualize_relations.py +42 -0
  100. govio-0.2.11/start.sh +22 -0
@@ -0,0 +1,23 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+ .trae/
9
+
10
+ # Virtual environments
11
+ .venv
12
+ logs/
13
+ data/
14
+ .env
15
+ uv.lock
16
+
17
+ assets/
18
+ .iflow
19
+ .worktrees/
20
+
21
+ # observe DataFrame storage
22
+ .govio/observe/
23
+ .claude/settings.local.json
govio-0.2.11/AGENTS.md ADDED
@@ -0,0 +1,251 @@
1
+ # AGENTS.md
2
+
3
+ Guidelines for AI coding agents working in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ Govio is a data governance knowledge graph library built on NetworkX. It provides metadata management, data standard recommendation, and graph database integration capabilities.
8
+
9
+ - **Language**: Python 3.13+
10
+ - **Package Manager**: uv
11
+ - **Build Backend**: hatchling
12
+
13
+ ## Build/Lint/Test Commands
14
+
15
+ ### Installation
16
+
17
+ ```bash
18
+ # Install dependencies
19
+ uv sync
20
+
21
+ # Install with dev dependencies
22
+ uv sync --group dev
23
+ ```
24
+
25
+ ### Running Tests
26
+
27
+ ```bash
28
+ # Run all tests
29
+ uv run pytest tests/
30
+
31
+ # Run a single test file
32
+ uv run pytest tests/test_relationship.py
33
+
34
+ # Run a specific test
35
+ uv run pytest tests/test_relationship.py::test_load_json_success
36
+
37
+ # Run tests with verbose output
38
+ uv run pytest tests/ -v
39
+
40
+ # Run tests with coverage
41
+ uv run pytest tests/ --cov=src/govio
42
+ ```
43
+
44
+ ### Linting and Formatting
45
+
46
+ ```bash
47
+ # Check code with ruff
48
+ uv run ruff check src/ tests/
49
+
50
+ # Format code with ruff
51
+ uv run ruff format src/ tests/
52
+
53
+ # Fix linting issues automatically
54
+ uv run ruff check --fix src/ tests/
55
+ ```
56
+
57
+ ### Build
58
+
59
+ ```bash
60
+ # Build package
61
+ uv build
62
+
63
+ # Build wheel only
64
+ uv build --wheel
65
+ ```
66
+
67
+ ### Type Checking
68
+
69
+ ```bash
70
+ # Run pyright (if available)
71
+ uv run pyright src/
72
+ ```
73
+
74
+ ## Code Style Guidelines
75
+
76
+ ### Imports
77
+
78
+ Order imports in three groups, separated by blank lines:
79
+
80
+ 1. Standard library imports (alphabetical)
81
+ 2. Third-party imports (alphabetical)
82
+ 3. Local imports (alphabetical)
83
+
84
+ ```python
85
+ import argparse
86
+ import json
87
+ from pathlib import Path
88
+ from typing import Any
89
+
90
+ import numpy as np
91
+ import pandas as pd
92
+ from sklearn.feature_extraction.text import TfidfVectorizer
93
+
94
+ from .application import AppInfoLoader
95
+ from .database import DatabaseLoader
96
+ ```
97
+
98
+ ### Type Hints
99
+
100
+ Use modern Python 3.13+ type hint syntax:
101
+
102
+ ```python
103
+ # Preferred
104
+ def load_tables(self, schema_limits: list[str] | None) -> pd.DataFrame:
105
+ ...
106
+
107
+ # Avoid (old style)
108
+ from typing import List, Optional
109
+ def load_tables(self, schema_limits: Optional[List[str]]) -> pd.DataFrame:
110
+ ...
111
+ ```
112
+
113
+ Use union types with `|` for optional parameters. Use `Any` sparingly.
114
+
115
+ ### Naming Conventions
116
+
117
+ - **Modules**: snake_case (`recommender.py`, `relationship.py`)
118
+ - **Classes**: PascalCase (`StandardRecommender`, `RelationshipLoader`)
119
+ - **Functions/Methods**: snake_case (`load_relationships`, `find_k_neighbors`)
120
+ - **Private methods**: prefix with underscore (`_preprocess_std_data`, `_validate_inputs`)
121
+ - **Constants**: UPPER_SNAKE_CASE (`DEFAULT_WEIGHTS`, `MIN_SIMILARITY`)
122
+ - **Properties**: snake_case with `@property` decorator (`PhysicalTable`, `Col`)
123
+
124
+ ### Docstrings
125
+
126
+ Use Chinese docstrings for Chinese-language projects. Include Args and Returns sections:
127
+
128
+ ```python
129
+ def validate_relationship(self, rel: dict[str, Any], index: int) -> bool:
130
+ """验证单个关系的有效性
131
+
132
+ Args:
133
+ rel: 关系字典
134
+ index: 关系索引(用于错误消息)
135
+
136
+ Returns:
137
+ bool: 是否有效
138
+ """
139
+ ```
140
+
141
+ ### Classes
142
+
143
+ - Use `__init__` for initialization with type-annotated parameters
144
+ - Use `@property` for computed attributes that don't require parameters
145
+ - Use factory functions for complex object creation
146
+
147
+ ```python
148
+ class DatabaseLoader:
149
+ def __init__(self, db: str, workspace_uuid: str, schema_limits: list[str] | None = None) -> None:
150
+ self.engine = create_engine(db)
151
+ self.workspace_uuid = workspace_uuid
152
+
153
+ @property
154
+ def PhysicalTable(self) -> pd.DataFrame:
155
+ return self.load_tables()
156
+ ```
157
+
158
+ ### Error Handling
159
+
160
+ - Raise descriptive exceptions with context
161
+ - Use logging module for warnings (not print statements)
162
+ - Validate inputs early in public methods
163
+
164
+ ```python
165
+ def _validate_inputs(self):
166
+ if not self.json_path.exists():
167
+ raise FileNotFoundError(f"关系文件不存在: {self.json_path}")
168
+
169
+ if self.df_tables.empty:
170
+ raise ValueError("PhysicalTable DataFrame 为空")
171
+ ```
172
+
173
+ ### Comments
174
+
175
+ - Write comments in Chinese for this codebase
176
+ - Avoid inline comments that restate the obvious
177
+ - Use module-level docstrings to explain purpose
178
+
179
+ ### File Organization
180
+
181
+ ```
182
+ src/govio/
183
+ ├── __init__.py # Public exports only
184
+ ├── graph/ # Graph database implementations
185
+ │ ├── __init__.py
186
+ │ ├── networkx_graph.py
187
+ │ └── falkordb_graph.py
188
+ └── metadata/ # Metadata loading and processing
189
+ ├── __init__.py
190
+ ├── database.py
191
+ ├── recommender.py
192
+ └── relationship.py
193
+ ```
194
+
195
+ ### Constants and Configuration
196
+
197
+ Define module-level constants at the top of the file:
198
+
199
+ ```python
200
+ DEFAULT_WEIGHTS = {
201
+ 'table': 0.20,
202
+ 'name': 0.26,
203
+ 'comment': 0.22,
204
+ 'type': 0.22,
205
+ 'numeric': 0.10
206
+ }
207
+
208
+ DEFAULT_K_NEIGHBORS = 5
209
+ MIN_SIMILARITY = 0.7
210
+ ```
211
+
212
+ ### Avoid
213
+
214
+ - Adding comments that restate code
215
+ - Using `print()` for logging (use `logging` module)
216
+ - Mutable default arguments
217
+ - Bare `except` clauses
218
+ - Star imports (`from module import *`)
219
+
220
+ ## Project-Specific Notes
221
+
222
+ ### Entry Points
223
+
224
+ The package defines CLI entry points in `pyproject.toml`:
225
+
226
+ - `metadata` - Generate metadata CSV files
227
+ - `gml_generate` - Generate GML graph files
228
+
229
+ ### Environment Variables
230
+
231
+ Load environment variables using `python-dotenv`:
232
+
233
+ ```python
234
+ from dotenv import load_dotenv
235
+ import os
236
+
237
+ load_dotenv()
238
+ db = os.getenv("KUNDB_URL", "")
239
+ ```
240
+
241
+ ### Testing
242
+
243
+ Tests use pytest with fixtures. Place fixtures at module level or in conftest.py:
244
+
245
+ ```python
246
+ @pytest.fixture
247
+ def sample_tables():
248
+ return pd.DataFrame({
249
+ "full_table_name": ["db.schema.table1"],
250
+ })
251
+ ```
govio-0.2.11/CLAUDE.md ADDED
@@ -0,0 +1,71 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ Govio is a Python data governance knowledge graph platform. It extracts metadata from MySQL databases, builds graph structures (via FalkorDB or NetworkX), and provides data standard recommendation using collaborative filtering (k-NN). The name combines "Governance" + "IO" (data interaction/flow).
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ # Install dependencies (uses uv package manager, Tsinghua mirror)
13
+ uv sync
14
+ uv sync --group dev # includes falkordb-bulk-loader
15
+
16
+ # Run tests (pytest-style tests in tests/, but using unittest runner in README)
17
+ uv run pytest tests/
18
+
19
+ # CLI entry points (defined in pyproject.toml [project.scripts])
20
+ metadata --kundb "mysql+pymysql://user:pass@host/db" --app-list "path.xlsx" --app-map "path.json" -o ./output
21
+ metadata --kundb "..." --app-list "..." --app-map "..." -m recommend -o ./output
22
+ metadata --kundb "..." --app-list "..." --app-map "..." --relationship rel.json -o ./output
23
+ gml_generate --csv ./output -o ./output
24
+ ```
25
+
26
+ ## Architecture
27
+
28
+ ### Source layout: `src/govio/`
29
+
30
+ **`__init__.py`** — Public API surface: exports `run`, `gml_generate`, `FalkorDBGraph`, `NetworkXGraph`.
31
+
32
+ **`graph/`** — Graph database abstractions:
33
+ - `networkx_graph.py` — In-memory graph via NetworkX GML files. Provides `schema` property (node/edge type inspection) and direct `G` access to the underlying `nx.DiGraph`.
34
+ - `falkordb_graph.py` — FalkorDB (Redis-based) graph client using Cypher queries.
35
+
36
+ **`metadata/`** — Data loading and processing pipeline:
37
+ - `database.py` — `DatabaseLoader`: extracts table/column metadata from MySQL via SQLAlchemy. Exposes `PhysicalTable` and `Col` as DataFrames. Requires `workspace_uuid` and `schema_limits`.
38
+ - `application.py` — `AppInfoLoader`: loads app metadata from Excel (openpyxl).
39
+ - `standard.py` — `StandardLoader`: loads data standards and compliance info from governance DB.
40
+ - `relationship.py` — `RelationshipLoader` / `load_relationships()`: validates table relationships from JSON (supports one_to_one, one_to_many, many_to_one, many_to_many). Returns edges DataFrame.
41
+ - `recommender.py` — `create_recommender()`: k-NN collaborative filtering for recommending data standards to non-compliant columns. Uses configurable weights (table, name, comment, type, numeric).
42
+ - `metric.py` — `MetricLoader` / `load_metrics()`: loads metric definitions from JSON (validated by `metric_schema.json`), produces Metric/Dimension node DataFrames and edge DataFrames (USES_TABLE, REFERS_COLUMN, DERIVED_FROM, DIMENSION_USED, SUPERSEDES). Validates source table references, derived_from references, and DAG property.
43
+ - `gen_networkx.py` — `build_graph()`: converts CSV node/edge files to NetworkX GML format. Reads specific CSV naming conventions (`:ID(NodeType)` columns for nodes, `:START_ID`/`:END_ID` for edges).
44
+ - `utility.py` — CLI entry point (`run()`), orchestrates the full pipeline: load metadata → generate CSVs → optionally produce GML. Also contains `data_standard_recommend()` for batch recommendation mode.
45
+
46
+ ### Graph model
47
+
48
+ Node types: `PhysicalTable`, `Col`, `Application`, `Standard`, `Metric`, `Dimension`
49
+ Edge types: `HAS_COLUMN` (table→col), `USE` (app→table), `COMPLIES_WITH` (col→standard), `RELATES_TO` (table→table), `USES_TABLE` (metric→table), `REFERS_COLUMN` (metric→col), `DERIVED_FROM` (metric→metric), `DIMENSION_USED` (metric→dimension), `SUPERSEDES` (metric→metric)
50
+
51
+ `Calculation` node type and `CALCULATED_BY`/`BASED_ON` edges are reserved for future shared calculation templates.
52
+
53
+ CSV files use FalkorDB bulk-import header conventions (`:ID(Type)`, `:START_ID(Type)`, `:END_ID(Type)`). The GML generator parses these headers to reconstruct typed graphs.
54
+
55
+ ### Data flow
56
+
57
+ 1. `metadata` CLI → DatabaseLoader + AppInfoLoader + StandardLoader → CSV files (node + edge)
58
+ 2. `metadata --relationship` appends RELATES_TO.csv
59
+ 3. `metadata` with `--metric` (via onboard) appends Metric.csv, Dimension.csv, and metric edge CSVs
60
+ 4. `metadata -m recommend` generates COMPLIES_WITH.csv via recommender
61
+ 4. `gml_generate` → CSV files → NetworkX GML graph
62
+ 5. `NetworkXGraph` loads GML for query/inspection
63
+
64
+ ### Key conventions
65
+
66
+ - Python 3.13+, uses modern type hints (`X | None` syntax, not `Optional[X]`)
67
+ - All metadata loaders return pandas DataFrames
68
+ - Node identities use dotted format: `db.schema.table.column`
69
+ - `workspace_uuid` is hardcoded in utility.py but parameterized in loader classes
70
+ - Tests use pytest (despite README mentioning unittest) — run with `uv run pytest`
71
+ - Chinese language is used in comments, print statements, and documentation
govio-0.2.11/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 chenxinma
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.