rdfsolve 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdfsolve-0.0.1/LICENSE +21 -0
- rdfsolve-0.0.1/MANIFEST.in +13 -0
- rdfsolve-0.0.1/Makefile +439 -0
- rdfsolve-0.0.1/PKG-INFO +307 -0
- rdfsolve-0.0.1/README.md +212 -0
- rdfsolve-0.0.1/data/sources.csv +87 -0
- rdfsolve-0.0.1/pyproject.toml +309 -0
- rdfsolve-0.0.1/scripts/generate_results_json.sh +198 -0
- rdfsolve-0.0.1/setup.cfg +4 -0
- rdfsolve-0.0.1/src/rdfsolve/__init__.py +26 -0
- rdfsolve-0.0.1/src/rdfsolve/__main__.py +12 -0
- rdfsolve-0.0.1/src/rdfsolve/api.py +515 -0
- rdfsolve-0.0.1/src/rdfsolve/cli.py +657 -0
- rdfsolve-0.0.1/src/rdfsolve/models.py +138 -0
- rdfsolve-0.0.1/src/rdfsolve/parser.py +4126 -0
- rdfsolve-0.0.1/src/rdfsolve/py.typed +1 -0
- rdfsolve-0.0.1/src/rdfsolve/schema_utils.py +326 -0
- rdfsolve-0.0.1/src/rdfsolve/sparql_helper.py +846 -0
- rdfsolve-0.0.1/src/rdfsolve/tools/utils.py +49 -0
- rdfsolve-0.0.1/src/rdfsolve/utils.py +159 -0
- rdfsolve-0.0.1/src/rdfsolve/version.py +42 -0
- rdfsolve-0.0.1/src/rdfsolve.egg-info/PKG-INFO +307 -0
- rdfsolve-0.0.1/src/rdfsolve.egg-info/SOURCES.txt +39 -0
- rdfsolve-0.0.1/src/rdfsolve.egg-info/dependency_links.txt +1 -0
- rdfsolve-0.0.1/src/rdfsolve.egg-info/entry_points.txt +2 -0
- rdfsolve-0.0.1/src/rdfsolve.egg-info/requires.txt +45 -0
- rdfsolve-0.0.1/src/rdfsolve.egg-info/top_level.txt +1 -0
- rdfsolve-0.0.1/tests/__init__.py +1 -0
- rdfsolve-0.0.1/tests/test_bioregistry_prefixes.py +214 -0
- rdfsolve-0.0.1/tests/test_data/aopwikirdf_generated_void.ttl +608 -0
- rdfsolve-0.0.1/tests/test_data/aopwikirdf_linkml_schema.yaml +643 -0
- rdfsolve-0.0.1/tests/test_data/aopwikirdf_pattern_coverage.csv +97 -0
- rdfsolve-0.0.1/tests/test_data/aopwikirdf_schema.csv +97 -0
- rdfsolve-0.0.1/tests/test_data/aopwikirdf_schema.json +1687 -0
- rdfsolve-0.0.1/tests/test_data/aopwikirdf_schema.jsonld +412 -0
- rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_frequencies_basic.pkl +0 -0
- rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_frequencies_with_instances.pkl +0 -0
- rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_jsonld_schema.pkl +0 -0
- rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_voidparser.pkl +0 -0
- rdfsolve-0.0.1/tests/test_frequency_count.py +151 -0
- rdfsolve-0.0.1/tests/test_parser.py +301 -0
rdfsolve-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Javier Millán Acosta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
graft src
|
|
2
|
+
graft tests
|
|
3
|
+
prune scripts
|
|
4
|
+
prune notebooks
|
|
5
|
+
prune tests/.pytest_cache
|
|
6
|
+
prune docs
|
|
7
|
+
|
|
8
|
+
global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle .idea/**
|
|
9
|
+
|
|
10
|
+
include README.md LICENSE Makefile
|
|
11
|
+
recursive-include data *.csv
|
|
12
|
+
recursive-include scripts *.sh
|
|
13
|
+
exclude tox.ini .readthedocs.yml .cruft.json CITATION.cff docker-compose.yml Dockerfile noxfile.py
|
rdfsolve-0.0.1/Makefile
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
# Makefile for rdfsolve notebook generation
|
|
2
|
+
# Can be used both locally and in GitHub Actions
|
|
3
|
+
|
|
4
|
+
# Variables - can be overridden by environment or command line
|
|
5
|
+
DATASET ?=
|
|
6
|
+
NOTEBOOK_TYPE ?= all
|
|
7
|
+
VENV_DIR ?= .venv
|
|
8
|
+
# Use absolute path for Python to avoid issues with cd
|
|
9
|
+
PYTHON ?= $(shell pwd)/$(VENV_DIR)/bin/python
|
|
10
|
+
PIP ?= $(shell pwd)/$(VENV_DIR)/bin/pip
|
|
11
|
+
JUPYTER ?= $(shell pwd)/$(VENV_DIR)/bin/jupyter
|
|
12
|
+
MAX_PARALLEL ?= 50
|
|
13
|
+
GITHUB_OUTPUT ?= /dev/null
|
|
14
|
+
GITHUB_STEP_SUMMARY ?= /dev/null
|
|
15
|
+
|
|
16
|
+
# Directories
|
|
17
|
+
NOTEBOOKS_DIR := notebooks
|
|
18
|
+
DOCS_DIR := docs
|
|
19
|
+
DOCS_NOTEBOOKS_DIR := $(DOCS_DIR)/notebooks
|
|
20
|
+
DOCS_DATA_DIR := $(DOCS_DIR)/data/schema_extraction
|
|
21
|
+
ARTIFACTS_DIR := artifacts
|
|
22
|
+
|
|
23
|
+
SCHEMA_NB_DIR := $(NOTEBOOKS_DIR)/01_schema_extraction
|
|
24
|
+
PYDANTIC_NB_DIR := $(NOTEBOOKS_DIR)/02_pydantic_models
|
|
25
|
+
NAMESPACE_NB_DIR := $(NOTEBOOKS_DIR)/03_bioregistry_namespaces
|
|
26
|
+
|
|
27
|
+
SCHEMA_DOCS_DIR := $(DOCS_NOTEBOOKS_DIR)/01_schema_extraction
|
|
28
|
+
PYDANTIC_DOCS_DIR := $(DOCS_NOTEBOOKS_DIR)/02_pydantic_models
|
|
29
|
+
NAMESPACE_DOCS_DIR := $(DOCS_NOTEBOOKS_DIR)/03_bioregistry_namespaces
|
|
30
|
+
|
|
31
|
+
# Colors for output
|
|
32
|
+
COLOR_RESET := \033[0m
|
|
33
|
+
COLOR_BOLD := \033[1m
|
|
34
|
+
COLOR_GREEN := \033[32m
|
|
35
|
+
COLOR_YELLOW := \033[33m
|
|
36
|
+
COLOR_BLUE := \033[34m
|
|
37
|
+
|
|
38
|
+
.PHONY: help
|
|
39
|
+
help: ## Show this help message
|
|
40
|
+
@echo "$(COLOR_BOLD)RDFSolve Notebook Generation Makefile$(COLOR_RESET)"
|
|
41
|
+
@echo ""
|
|
42
|
+
@echo "$(COLOR_BLUE)Usage:$(COLOR_RESET)"
|
|
43
|
+
@echo " make <target> [DATASET=<name>] [NOTEBOOK_TYPE=<type>]"
|
|
44
|
+
@echo ""
|
|
45
|
+
@echo "$(COLOR_BLUE)Targets:$(COLOR_RESET)"
|
|
46
|
+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " $(COLOR_GREEN)%-30s$(COLOR_RESET) %s\n", $$1, $$2}'
|
|
47
|
+
@echo ""
|
|
48
|
+
@echo "$(COLOR_BLUE)Examples:$(COLOR_RESET)"
|
|
49
|
+
@echo " make all # Generate all notebooks for all datasets"
|
|
50
|
+
@echo " make schema DATASET=aopwikirdf # Generate schema notebook for one dataset"
|
|
51
|
+
@echo " make clean # Clean generated files"
|
|
52
|
+
@echo " make install-deps # Install Python dependencies"
|
|
53
|
+
|
|
54
|
+
.PHONY: all
|
|
55
|
+
all: install-deps setup ## Run complete notebook generation pipeline for DATASET or all datasets
|
|
56
|
+
@if [ -n "$(DATASET)" ]; then \
|
|
57
|
+
echo "$(COLOR_BLUE)Running pipeline for single dataset: $(DATASET)$(COLOR_RESET)"; \
|
|
58
|
+
$(MAKE) schema DATASET=$(DATASET); \
|
|
59
|
+
$(MAKE) pydantic DATASET=$(DATASET); \
|
|
60
|
+
$(MAKE) namespace DATASET=$(DATASET); \
|
|
61
|
+
else \
|
|
62
|
+
echo "$(COLOR_BLUE)Running pipeline for all datasets$(COLOR_RESET)"; \
|
|
63
|
+
$(MAKE) schema-all; \
|
|
64
|
+
$(MAKE) pydantic-all; \
|
|
65
|
+
$(MAKE) namespace-all; \
|
|
66
|
+
fi
|
|
67
|
+
@$(MAKE) collect
|
|
68
|
+
|
|
69
|
+
.PHONY: setup
|
|
70
|
+
setup: ## Create necessary directories
|
|
71
|
+
@echo "$(COLOR_BLUE)Creating output directories...$(COLOR_RESET)"
|
|
72
|
+
@mkdir -p $(SCHEMA_DOCS_DIR)
|
|
73
|
+
@mkdir -p $(PYDANTIC_DOCS_DIR)
|
|
74
|
+
@mkdir -p $(NAMESPACE_DOCS_DIR)
|
|
75
|
+
@mkdir -p $(DOCS_DATA_DIR)
|
|
76
|
+
@mkdir -p $(SCHEMA_NB_DIR)
|
|
77
|
+
@mkdir -p $(PYDANTIC_NB_DIR)
|
|
78
|
+
@mkdir -p $(NAMESPACE_NB_DIR)
|
|
79
|
+
@echo "$(COLOR_GREEN)✓ Directories created$(COLOR_RESET)"
|
|
80
|
+
|
|
81
|
+
.PHONY: venv
|
|
82
|
+
venv: ## Create virtual environment with uv (Python 3.10+)
|
|
83
|
+
@if [ -d "$(VENV_DIR)" ]; then \
|
|
84
|
+
echo "$(COLOR_GREEN)✓ Virtual environment already exists at $(VENV_DIR)$(COLOR_RESET)"; \
|
|
85
|
+
else \
|
|
86
|
+
echo "$(COLOR_BLUE)Creating virtual environment with uv...$(COLOR_RESET)"; \
|
|
87
|
+
if command -v uv >/dev/null 2>&1; then \
|
|
88
|
+
uv venv $(VENV_DIR) --python 3.10; \
|
|
89
|
+
echo "$(COLOR_GREEN)✓ Virtual environment created at $(VENV_DIR)$(COLOR_RESET)"; \
|
|
90
|
+
else \
|
|
91
|
+
echo "$(COLOR_YELLOW)Error: uv not found. Please install it first:$(COLOR_RESET)"; \
|
|
92
|
+
echo " curl -LsSf https://astral.sh/uv/install.sh | sh"; \
|
|
93
|
+
exit 1; \
|
|
94
|
+
fi; \
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
.PHONY: check-venv
|
|
98
|
+
check-venv: venv ## Ensure virtual environment exists and check Python version
|
|
99
|
+
@if [ ! -f "$(PYTHON)" ]; then \
|
|
100
|
+
echo "$(COLOR_YELLOW)Virtual environment Python not found, recreating...$(COLOR_RESET)"; \
|
|
101
|
+
rm -rf $(VENV_DIR); \
|
|
102
|
+
$(MAKE) venv; \
|
|
103
|
+
fi
|
|
104
|
+
@python_version=$$($(PYTHON) -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || echo "0.0"); \
|
|
105
|
+
if [ "$$(echo "$$python_version >= 3.10" | bc -l)" -eq 0 ]; then \
|
|
106
|
+
echo "$(COLOR_YELLOW)Error: Python $$python_version found in venv, but >= 3.10 required$(COLOR_RESET)"; \
|
|
107
|
+
echo "Recreating virtual environment..."; \
|
|
108
|
+
rm -rf $(VENV_DIR); \
|
|
109
|
+
$(MAKE) venv; \
|
|
110
|
+
else \
|
|
111
|
+
echo "$(COLOR_GREEN)✓ Using Python $$python_version from $(VENV_DIR)$(COLOR_RESET)"; \
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
.PHONY: install-deps
|
|
115
|
+
install-deps: check-venv ## Install Python dependencies with uv
|
|
116
|
+
@echo "$(COLOR_BLUE)Installing Python dependencies with uv...$(COLOR_RESET)"
|
|
117
|
+
@if command -v uv >/dev/null 2>&1; then \
|
|
118
|
+
uv pip install -e .[notebooks]; \
|
|
119
|
+
uv pip install jupyter nbconvert pandas; \
|
|
120
|
+
else \
|
|
121
|
+
echo "$(COLOR_YELLOW)uv not found, using pip...$(COLOR_RESET)"; \
|
|
122
|
+
$(PYTHON) -m pip install --upgrade pip; \
|
|
123
|
+
$(PIP) install -e .[notebooks]; \
|
|
124
|
+
$(PIP) install jupyter nbconvert pandas; \
|
|
125
|
+
fi
|
|
126
|
+
@echo "$(COLOR_GREEN)✓ Dependencies installed$(COLOR_RESET)"
|
|
127
|
+
|
|
128
|
+
.PHONY: install-system-deps
|
|
129
|
+
install-system-deps: ## Install system dependencies (requires sudo)
|
|
130
|
+
@echo "$(COLOR_BLUE)Installing system dependencies...$(COLOR_RESET)"
|
|
131
|
+
sudo apt-get update
|
|
132
|
+
sudo apt-get install -y pandoc
|
|
133
|
+
@echo "$(COLOR_GREEN)✓ System dependencies installed$(COLOR_RESET)"
|
|
134
|
+
|
|
135
|
+
.PHONY: list-datasets
|
|
136
|
+
list-datasets: ## List all available datasets from sources.csv
|
|
137
|
+
@echo "$(COLOR_BLUE)Available datasets:$(COLOR_RESET)"
|
|
138
|
+
@tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$' | sort
|
|
139
|
+
|
|
140
|
+
.PHONY: prepare-matrix
|
|
141
|
+
prepare-matrix: ## Generate dataset matrix (for CI)
|
|
142
|
+
@echo "$(COLOR_BLUE)Generating dataset matrix...$(COLOR_RESET)"
|
|
143
|
+
@datasets=$$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$' | jq -R . | jq -s . | jq -c .); \
|
|
144
|
+
echo "schema-matrix={\"dataset\":$$datasets}" >> $(GITHUB_OUTPUT); \
|
|
145
|
+
echo "pydantic-matrix={\"dataset\":$$datasets}" >> $(GITHUB_OUTPUT); \
|
|
146
|
+
echo "namespace-matrix={\"dataset\":$$datasets}" >> $(GITHUB_OUTPUT); \
|
|
147
|
+
dataset_count=$$(echo $$datasets | jq length); \
|
|
148
|
+
echo "Generated matrix with $$dataset_count datasets"; \
|
|
149
|
+
echo "Notebook type requested: $(NOTEBOOK_TYPE)"
|
|
150
|
+
|
|
151
|
+
.PHONY: generate-schema-nb
|
|
152
|
+
generate-schema-nb: ## Generate schema notebook for DATASET
|
|
153
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
154
|
+
echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
|
|
155
|
+
echo "Usage: make generate-schema-nb DATASET=<name>"; \
|
|
156
|
+
exit 1; \
|
|
157
|
+
fi
|
|
158
|
+
@echo "$(COLOR_BLUE)Generating schema notebook for dataset: $(DATASET)$(COLOR_RESET)"
|
|
159
|
+
cd $(NOTEBOOKS_DIR) && $(PYTHON) make_notebooks.py --dataset "$(DATASET)" --type schema
|
|
160
|
+
@echo "$(COLOR_GREEN)✓ Schema notebook generated for $(DATASET)$(COLOR_RESET)"
|
|
161
|
+
|
|
162
|
+
.PHONY: execute-schema-nb
|
|
163
|
+
execute-schema-nb: setup ## Execute and convert schema notebook to HTML
|
|
164
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
165
|
+
echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
|
|
166
|
+
exit 1; \
|
|
167
|
+
fi
|
|
168
|
+
@echo "$(COLOR_BLUE)Executing schema notebook for $(DATASET)...$(COLOR_RESET)"
|
|
169
|
+
@cd $(SCHEMA_NB_DIR) && \
|
|
170
|
+
notebook="$(DATASET)_schema.ipynb" && \
|
|
171
|
+
if $(JUPYTER) nbconvert \
|
|
172
|
+
--execute \
|
|
173
|
+
--to html \
|
|
174
|
+
"$$notebook" \
|
|
175
|
+
--output-dir ../../$(SCHEMA_DOCS_DIR) \
|
|
176
|
+
--ExecutePreprocessor.kernel_name=python3 ; then \
|
|
177
|
+
echo "SUCCESS=true" >> $(GITHUB_OUTPUT); \
|
|
178
|
+
echo "$(COLOR_GREEN)✓ Successfully converted: $$notebook$(COLOR_RESET)"; \
|
|
179
|
+
echo "**$(DATASET)**: Schema analysis completed successfully" >> $(GITHUB_STEP_SUMMARY); \
|
|
180
|
+
else \
|
|
181
|
+
echo "SUCCESS=false" >> $(GITHUB_OUTPUT); \
|
|
182
|
+
echo "$(COLOR_YELLOW)⚠ Failed to convert: $$notebook$(COLOR_RESET)"; \
|
|
183
|
+
echo "**$(DATASET)**: Schema analysis failed (timeout or error)" >> $(GITHUB_STEP_SUMMARY); \
|
|
184
|
+
$(MAKE) create-schema-error-report; \
|
|
185
|
+
fi
|
|
186
|
+
|
|
187
|
+
.PHONY: create-schema-error-report
|
|
188
|
+
create-schema-error-report: ## Create error report HTML for failed schema notebook
|
|
189
|
+
@echo "$(COLOR_YELLOW)Creating error report for $(DATASET)...$(COLOR_RESET)"
|
|
190
|
+
@cat > "$(SCHEMA_DOCS_DIR)/$(DATASET)_schema.html" << 'EOF'
|
|
191
|
+
<!DOCTYPE html>
|
|
192
|
+
<html>
|
|
193
|
+
<head>
|
|
194
|
+
<title>Schema Analysis Failed - $(DATASET)</title>
|
|
195
|
+
</head>
|
|
196
|
+
<body>
|
|
197
|
+
<h1>Schema Analysis Failed</h1>
|
|
198
|
+
<h2>Dataset Information</h2>
|
|
199
|
+
<p><strong>Dataset:</strong> $(DATASET)</p>
|
|
200
|
+
<p><strong>Attempted:</strong> <script>document.write(new Date().toUTCString())</script></p>
|
|
201
|
+
<h2>Manual Execution</h2>
|
|
202
|
+
<pre>cd notebooks/01_schema_extraction
|
|
203
|
+
python make_notebooks.py --dataset $(DATASET) --type schema
|
|
204
|
+
jupyter nbconvert --execute --to html $(DATASET)_schema.ipynb</pre>
|
|
205
|
+
</body>
|
|
206
|
+
</html>
|
|
207
|
+
EOF
|
|
208
|
+
|
|
209
|
+
.PHONY: schema
|
|
210
|
+
schema: install-deps setup generate-schema-nb execute-schema-nb ## Generate and execute schema notebook for DATASET
|
|
211
|
+
|
|
212
|
+
.PHONY: schema-all
|
|
213
|
+
schema-all: ## Generate and execute schema notebooks for all datasets
|
|
214
|
+
@echo "$(COLOR_BLUE)Processing all datasets for schema notebooks...$(COLOR_RESET)"
|
|
215
|
+
@for dataset in $$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$'); do \
|
|
216
|
+
echo "$(COLOR_BLUE)Processing: $$dataset$(COLOR_RESET)"; \
|
|
217
|
+
$(MAKE) schema DATASET=$$dataset || echo "$(COLOR_YELLOW)⚠ Failed: $$dataset$(COLOR_RESET)"; \
|
|
218
|
+
done
|
|
219
|
+
|
|
220
|
+
.PHONY: generate-pydantic-nb
|
|
221
|
+
generate-pydantic-nb: ## Generate pydantic notebook for DATASET
|
|
222
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
223
|
+
echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
|
|
224
|
+
exit 1; \
|
|
225
|
+
fi
|
|
226
|
+
@echo "$(COLOR_BLUE)Generating pydantic notebook for dataset: $(DATASET)$(COLOR_RESET)"
|
|
227
|
+
cd $(NOTEBOOKS_DIR) && $(PYTHON) make_notebooks.py --dataset "$(DATASET)" --type pydantic
|
|
228
|
+
@echo "$(COLOR_GREEN)✓ Pydantic notebook generated for $(DATASET)$(COLOR_RESET)"
|
|
229
|
+
|
|
230
|
+
.PHONY: execute-pydantic-nb
|
|
231
|
+
execute-pydantic-nb: setup ## Execute and convert pydantic notebook to HTML
|
|
232
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
233
|
+
echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
|
|
234
|
+
exit 1; \
|
|
235
|
+
fi
|
|
236
|
+
@echo "$(COLOR_BLUE)Executing pydantic notebook for $(DATASET)...$(COLOR_RESET)"
|
|
237
|
+
@cd $(PYDANTIC_NB_DIR) && \
|
|
238
|
+
notebook="$(DATASET)_pydantic.ipynb" && \
|
|
239
|
+
if $(JUPYTER) nbconvert \
|
|
240
|
+
--execute \
|
|
241
|
+
--to html \
|
|
242
|
+
"$$notebook" \
|
|
243
|
+
--output-dir ../../$(PYDANTIC_DOCS_DIR) \
|
|
244
|
+
--ExecutePreprocessor.kernel_name=python3 ; then \
|
|
245
|
+
echo "SUCCESS=true" >> $(GITHUB_OUTPUT); \
|
|
246
|
+
echo "$(COLOR_GREEN)✓ Successfully converted: $$notebook$(COLOR_RESET)"; \
|
|
247
|
+
echo "**$(DATASET)**: Pydantic model generation completed successfully" >> $(GITHUB_STEP_SUMMARY); \
|
|
248
|
+
else \
|
|
249
|
+
echo "SUCCESS=false" >> $(GITHUB_OUTPUT); \
|
|
250
|
+
echo "$(COLOR_YELLOW)⚠ Failed to convert: $$notebook$(COLOR_RESET)"; \
|
|
251
|
+
echo "**$(DATASET)**: Pydantic model generation failed (timeout or error)" >> $(GITHUB_STEP_SUMMARY); \
|
|
252
|
+
$(MAKE) create-pydantic-error-report; \
|
|
253
|
+
fi
|
|
254
|
+
|
|
255
|
+
.PHONY: create-pydantic-error-report
|
|
256
|
+
create-pydantic-error-report: ## Create error report HTML for failed pydantic notebook
|
|
257
|
+
@echo "$(COLOR_YELLOW)Creating error report for $(DATASET)...$(COLOR_RESET)"
|
|
258
|
+
@cat > "$(PYDANTIC_DOCS_DIR)/$(DATASET)_pydantic.html" << 'EOF'
|
|
259
|
+
<!DOCTYPE html>
|
|
260
|
+
<html>
|
|
261
|
+
<head>
|
|
262
|
+
<title>Pydantic Generation Failed - $(DATASET)</title>
|
|
263
|
+
</head>
|
|
264
|
+
<body>
|
|
265
|
+
<h1>Pydantic Generation Failed</h1>
|
|
266
|
+
<h2>Dataset Information</h2>
|
|
267
|
+
<p><strong>Dataset:</strong> $(DATASET)</p>
|
|
268
|
+
<p><strong>Attempted:</strong> <script>document.write(new Date().toUTCString())</script></p>
|
|
269
|
+
<h2>Manual Execution</h2>
|
|
270
|
+
<pre>cd notebooks
|
|
271
|
+
python make_notebooks.py --dataset $(DATASET) --type pydantic
|
|
272
|
+
cd 02_pydantic_models
|
|
273
|
+
jupyter nbconvert --execute --to html $(DATASET)_pydantic.ipynb</pre>
|
|
274
|
+
</body>
|
|
275
|
+
</html>
|
|
276
|
+
EOF
|
|
277
|
+
|
|
278
|
+
.PHONY: pydantic
|
|
279
|
+
pydantic: generate-pydantic-nb execute-pydantic-nb ## Generate and execute pydantic notebook for DATASET
|
|
280
|
+
|
|
281
|
+
.PHONY: pydantic-all
|
|
282
|
+
pydantic-all: ## Generate and execute pydantic notebooks for all datasets
|
|
283
|
+
@echo "$(COLOR_BLUE)Processing all datasets for pydantic notebooks...$(COLOR_RESET)"
|
|
284
|
+
@for dataset in $$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$'); do \
|
|
285
|
+
echo "$(COLOR_BLUE)Processing: $$dataset$(COLOR_RESET)"; \
|
|
286
|
+
$(MAKE) pydantic DATASET=$$dataset || echo "$(COLOR_YELLOW)⚠ Failed: $$dataset$(COLOR_RESET)"; \
|
|
287
|
+
done
|
|
288
|
+
|
|
289
|
+
.PHONY: generate-namespace-nb
|
|
290
|
+
generate-namespace-nb: ## Generate namespace notebook for DATASET
|
|
291
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
292
|
+
echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
|
|
293
|
+
exit 1; \
|
|
294
|
+
fi
|
|
295
|
+
@echo "$(COLOR_BLUE)Generating namespace notebook for dataset: $(DATASET)$(COLOR_RESET)"
|
|
296
|
+
cd $(NOTEBOOKS_DIR) && $(PYTHON) make_notebooks.py --dataset "$(DATASET)" --type namespace
|
|
297
|
+
@echo "$(COLOR_GREEN)✓ Namespace notebook generated for $(DATASET)$(COLOR_RESET)"
|
|
298
|
+
|
|
299
|
+
.PHONY: execute-namespace-nb
|
|
300
|
+
execute-namespace-nb: setup ## Execute and convert namespace notebook to HTML
|
|
301
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
302
|
+
echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
|
|
303
|
+
exit 1; \
|
|
304
|
+
fi
|
|
305
|
+
@echo "$(COLOR_BLUE)Executing namespace notebook for $(DATASET)...$(COLOR_RESET)"
|
|
306
|
+
@cd $(NAMESPACE_NB_DIR) && \
|
|
307
|
+
notebook="$(DATASET)_namespaces.ipynb" && \
|
|
308
|
+
if $(JUPYTER) nbconvert \
|
|
309
|
+
--execute \
|
|
310
|
+
--to html \
|
|
311
|
+
"$$notebook" \
|
|
312
|
+
--output-dir ../../$(NAMESPACE_DOCS_DIR) \
|
|
313
|
+
--ExecutePreprocessor.kernel_name=python3 ; then \
|
|
314
|
+
echo "SUCCESS=true" >> $(GITHUB_OUTPUT); \
|
|
315
|
+
echo "$(COLOR_GREEN)✓ Successfully converted: $$notebook$(COLOR_RESET)"; \
|
|
316
|
+
echo "**$(DATASET)**: Namespace discovery completed successfully" >> $(GITHUB_STEP_SUMMARY); \
|
|
317
|
+
else \
|
|
318
|
+
echo "SUCCESS=false" >> $(GITHUB_OUTPUT); \
|
|
319
|
+
echo "$(COLOR_YELLOW)⚠ Failed to convert: $$notebook$(COLOR_RESET)"; \
|
|
320
|
+
echo "**$(DATASET)**: Namespace discovery failed (timeout or error)" >> $(GITHUB_STEP_SUMMARY); \
|
|
321
|
+
$(MAKE) create-namespace-error-report; \
|
|
322
|
+
fi
|
|
323
|
+
|
|
324
|
+
.PHONY: create-namespace-error-report
|
|
325
|
+
create-namespace-error-report: ## Create error report HTML for failed namespace notebook
|
|
326
|
+
@echo "$(COLOR_YELLOW)Creating error report for $(DATASET)...$(COLOR_RESET)"
|
|
327
|
+
@cat > "$(NAMESPACE_DOCS_DIR)/$(DATASET)_namespaces.html" << 'EOF'
|
|
328
|
+
<!DOCTYPE html>
|
|
329
|
+
<html>
|
|
330
|
+
<head>
|
|
331
|
+
<title>Namespace Discovery Failed - $(DATASET)</title>
|
|
332
|
+
</head>
|
|
333
|
+
<body>
|
|
334
|
+
<h1>Namespace Discovery Failed</h1>
|
|
335
|
+
<h2>Dataset Information</h2>
|
|
336
|
+
<p><strong>Dataset:</strong> $(DATASET)</p>
|
|
337
|
+
<p><strong>Attempted:</strong> <script>document.write(new Date().toUTCString())</script></p>
|
|
338
|
+
<h2>Manual Execution</h2>
|
|
339
|
+
<pre>cd notebooks
|
|
340
|
+
python make_notebooks.py --dataset $(DATASET) --type namespace
|
|
341
|
+
cd 03_bioregistry_namespaces
|
|
342
|
+
jupyter nbconvert --execute --to html $(DATASET)_namespaces.ipynb</pre>
|
|
343
|
+
</body>
|
|
344
|
+
</html>
|
|
345
|
+
EOF
|
|
346
|
+
|
|
347
|
+
.PHONY: namespace
|
|
348
|
+
namespace: generate-namespace-nb execute-namespace-nb ## Generate and execute namespace notebook for DATASET
|
|
349
|
+
|
|
350
|
+
.PHONY: namespace-all
|
|
351
|
+
namespace-all: ## Generate and execute namespace notebooks for all datasets
|
|
352
|
+
@echo "$(COLOR_BLUE)Processing all datasets for namespace notebooks...$(COLOR_RESET)"
|
|
353
|
+
@for dataset in $$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$'); do \
|
|
354
|
+
echo "$(COLOR_BLUE)Processing: $$dataset$(COLOR_RESET)"; \
|
|
355
|
+
$(MAKE) namespace DATASET=$$dataset || echo "$(COLOR_YELLOW)⚠ Failed: $$dataset$(COLOR_RESET)"; \
|
|
356
|
+
done
|
|
357
|
+
|
|
358
|
+
.PHONY: collect
|
|
359
|
+
collect: ## Collect and organize all generated results
|
|
360
|
+
@echo "$(COLOR_BLUE)Collecting and organizing results...$(COLOR_RESET)"
|
|
361
|
+
@$(MAKE) setup
|
|
362
|
+
@if [ -d "$(ARTIFACTS_DIR)" ] && [ -n "$$(ls -A $(ARTIFACTS_DIR)/ 2>/dev/null)" ]; then \
|
|
363
|
+
echo "$(COLOR_BLUE)Collecting artifacts from matrix jobs...$(COLOR_RESET)"; \
|
|
364
|
+
$(MAKE) collect-artifacts; \
|
|
365
|
+
else \
|
|
366
|
+
echo "$(COLOR_YELLOW)No artifacts directory found, skipping artifact collection$(COLOR_RESET)"; \
|
|
367
|
+
fi
|
|
368
|
+
@$(MAKE) count-results
|
|
369
|
+
@$(MAKE) generate-results-json
|
|
370
|
+
@echo "$(COLOR_GREEN)✓ Results collected$(COLOR_RESET)"
|
|
371
|
+
|
|
372
|
+
.PHONY: collect-artifacts
|
|
373
|
+
collect-artifacts: ## Collect artifacts from CI runs
|
|
374
|
+
@echo "$(COLOR_BLUE)Copying artifacts...$(COLOR_RESET)"
|
|
375
|
+
@# Schema notebooks and HTML
|
|
376
|
+
@find $(ARTIFACTS_DIR)/ -name "*_schema.ipynb" -exec cp {} $(SCHEMA_NB_DIR)/ \; 2>/dev/null || true
|
|
377
|
+
@find $(ARTIFACTS_DIR)/ -name "*_schema.html" -exec cp {} $(SCHEMA_DOCS_DIR)/ \; 2>/dev/null || true
|
|
378
|
+
@# Pydantic notebooks and HTML
|
|
379
|
+
@find $(ARTIFACTS_DIR)/ -name "*_pydantic.ipynb" -exec cp {} $(PYDANTIC_NB_DIR)/ \; 2>/dev/null || true
|
|
380
|
+
@find $(ARTIFACTS_DIR)/ -name "*_pydantic.html" -exec cp {} $(PYDANTIC_DOCS_DIR)/ \; 2>/dev/null || true
|
|
381
|
+
@# Namespace notebooks and HTML
|
|
382
|
+
@find $(ARTIFACTS_DIR)/ -name "*_namespaces.ipynb" -exec cp {} $(NAMESPACE_NB_DIR)/ \; 2>/dev/null || true
|
|
383
|
+
@find $(ARTIFACTS_DIR)/ -name "*_namespaces.html" -exec cp {} $(NAMESPACE_DOCS_DIR)/ \; 2>/dev/null || true
|
|
384
|
+
@# Data files
|
|
385
|
+
@find $(ARTIFACTS_DIR)/ -path "*/data/schema_extraction/*" -type f \
|
|
386
|
+
\( -name "*.jsonld" -o -name "*.yaml" -o -name "*.csv" -o -name "*.ttl" -o -name "*.nq" -o -name "*.parquet" -o -name "*.json" -o -name "*.jsonl" \) \
|
|
387
|
+
-exec bash -c 'file="$$1"; relative_path="$${file#*/data/schema_extraction/}"; dataset_name=$$(echo "$$relative_path" | cut -d/ -f1); filename=$$(basename "$$relative_path"); mkdir -p "$(DOCS_DATA_DIR)/$$dataset_name"; cp "$$file" "$(DOCS_DATA_DIR)/$$dataset_name/$$filename"' _ {} \; 2>/dev/null || true
|
|
388
|
+
|
|
389
|
+
.PHONY: count-results
|
|
390
|
+
count-results: ## Count generated files
|
|
391
|
+
@echo "$(COLOR_BLUE)Final collection results:$(COLOR_RESET)"
|
|
392
|
+
@schema_nb=$$(find $(SCHEMA_NB_DIR)/ -name "*_schema.ipynb" 2>/dev/null | wc -l); \
|
|
393
|
+
schema_html=$$(find $(SCHEMA_DOCS_DIR)/ -name "*_schema.html" 2>/dev/null | wc -l); \
|
|
394
|
+
pydantic_nb=$$(find $(PYDANTIC_NB_DIR)/ -name "*_pydantic.ipynb" 2>/dev/null | wc -l); \
|
|
395
|
+
pydantic_html=$$(find $(PYDANTIC_DOCS_DIR)/ -name "*_pydantic.html" 2>/dev/null | wc -l); \
|
|
396
|
+
namespace_nb=$$(find $(NAMESPACE_NB_DIR)/ -name "*_namespaces.ipynb" 2>/dev/null | wc -l); \
|
|
397
|
+
namespace_html=$$(find $(NAMESPACE_DOCS_DIR)/ -name "*_namespaces.html" 2>/dev/null | wc -l); \
|
|
398
|
+
data_files=$$(find $(DOCS_DATA_DIR)/ -type f 2>/dev/null | wc -l); \
|
|
399
|
+
echo " Schema notebooks: $$schema_nb"; \
|
|
400
|
+
echo " Schema HTML files: $$schema_html"; \
|
|
401
|
+
echo " Pydantic notebooks: $$pydantic_nb"; \
|
|
402
|
+
echo " Pydantic HTML files: $$pydantic_html"; \
|
|
403
|
+
echo " Namespace notebooks: $$namespace_nb"; \
|
|
404
|
+
echo " Namespace HTML files: $$namespace_html"; \
|
|
405
|
+
echo " Data files: $$data_files"
|
|
406
|
+
|
|
407
|
+
.PHONY: generate-results-json
|
|
408
|
+
generate-results-json: ## Generate results.json for web interface
|
|
409
|
+
@echo "$(COLOR_BLUE)Generating results.json...$(COLOR_RESET)"
|
|
410
|
+
@bash scripts/generate_results_json.sh
|
|
411
|
+
|
|
412
|
+
.PHONY: clean
|
|
413
|
+
clean: ## Remove generated notebooks and HTML files
|
|
414
|
+
@echo "$(COLOR_YELLOW)Cleaning generated files...$(COLOR_RESET)"
|
|
415
|
+
@rm -f $(SCHEMA_NB_DIR)/*_schema.ipynb
|
|
416
|
+
@rm -f $(SCHEMA_DOCS_DIR)/*_schema.html
|
|
417
|
+
@rm -f $(PYDANTIC_NB_DIR)/*_pydantic.ipynb
|
|
418
|
+
@rm -f $(PYDANTIC_DOCS_DIR)/*_pydantic.html
|
|
419
|
+
@rm -f $(NAMESPACE_NB_DIR)/*_namespaces.ipynb
|
|
420
|
+
@rm -f $(NAMESPACE_DOCS_DIR)/*_namespaces.html
|
|
421
|
+
@rm -f $(DOCS_DIR)/results.json
|
|
422
|
+
@echo "$(COLOR_GREEN)✓ Cleaned$(COLOR_RESET)"
|
|
423
|
+
|
|
424
|
+
.PHONY: clean-all
|
|
425
|
+
clean-all: clean ## Remove all generated files including data
|
|
426
|
+
@echo "$(COLOR_YELLOW)Cleaning all generated files including data...$(COLOR_RESET)"
|
|
427
|
+
@rm -rf $(DOCS_DATA_DIR)/*
|
|
428
|
+
@rm -rf $(ARTIFACTS_DIR)
|
|
429
|
+
@echo "$(COLOR_GREEN)✓ All cleaned$(COLOR_RESET)"
|
|
430
|
+
|
|
431
|
+
.PHONY: test-one
|
|
432
|
+
test-one: ## Quick test with one dataset (usage: make test-one DATASET=aopwikirdf)
|
|
433
|
+
@if [ -z "$(DATASET)" ]; then \
|
|
434
|
+
DATASET=$$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$' | head -1); \
|
|
435
|
+
echo "$(COLOR_BLUE)No DATASET specified, using first dataset: $$DATASET$(COLOR_RESET)"; \
|
|
436
|
+
$(MAKE) schema DATASET=$$DATASET; \
|
|
437
|
+
else \
|
|
438
|
+
$(MAKE) schema DATASET=$(DATASET); \
|
|
439
|
+
fi
|