rdfsolve 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. rdfsolve-0.0.1/LICENSE +21 -0
  2. rdfsolve-0.0.1/MANIFEST.in +13 -0
  3. rdfsolve-0.0.1/Makefile +439 -0
  4. rdfsolve-0.0.1/PKG-INFO +307 -0
  5. rdfsolve-0.0.1/README.md +212 -0
  6. rdfsolve-0.0.1/data/sources.csv +87 -0
  7. rdfsolve-0.0.1/pyproject.toml +309 -0
  8. rdfsolve-0.0.1/scripts/generate_results_json.sh +198 -0
  9. rdfsolve-0.0.1/setup.cfg +4 -0
  10. rdfsolve-0.0.1/src/rdfsolve/__init__.py +26 -0
  11. rdfsolve-0.0.1/src/rdfsolve/__main__.py +12 -0
  12. rdfsolve-0.0.1/src/rdfsolve/api.py +515 -0
  13. rdfsolve-0.0.1/src/rdfsolve/cli.py +657 -0
  14. rdfsolve-0.0.1/src/rdfsolve/models.py +138 -0
  15. rdfsolve-0.0.1/src/rdfsolve/parser.py +4126 -0
  16. rdfsolve-0.0.1/src/rdfsolve/py.typed +1 -0
  17. rdfsolve-0.0.1/src/rdfsolve/schema_utils.py +326 -0
  18. rdfsolve-0.0.1/src/rdfsolve/sparql_helper.py +846 -0
  19. rdfsolve-0.0.1/src/rdfsolve/tools/utils.py +49 -0
  20. rdfsolve-0.0.1/src/rdfsolve/utils.py +159 -0
  21. rdfsolve-0.0.1/src/rdfsolve/version.py +42 -0
  22. rdfsolve-0.0.1/src/rdfsolve.egg-info/PKG-INFO +307 -0
  23. rdfsolve-0.0.1/src/rdfsolve.egg-info/SOURCES.txt +39 -0
  24. rdfsolve-0.0.1/src/rdfsolve.egg-info/dependency_links.txt +1 -0
  25. rdfsolve-0.0.1/src/rdfsolve.egg-info/entry_points.txt +2 -0
  26. rdfsolve-0.0.1/src/rdfsolve.egg-info/requires.txt +45 -0
  27. rdfsolve-0.0.1/src/rdfsolve.egg-info/top_level.txt +1 -0
  28. rdfsolve-0.0.1/tests/__init__.py +1 -0
  29. rdfsolve-0.0.1/tests/test_bioregistry_prefixes.py +214 -0
  30. rdfsolve-0.0.1/tests/test_data/aopwikirdf_generated_void.ttl +608 -0
  31. rdfsolve-0.0.1/tests/test_data/aopwikirdf_linkml_schema.yaml +643 -0
  32. rdfsolve-0.0.1/tests/test_data/aopwikirdf_pattern_coverage.csv +97 -0
  33. rdfsolve-0.0.1/tests/test_data/aopwikirdf_schema.csv +97 -0
  34. rdfsolve-0.0.1/tests/test_data/aopwikirdf_schema.json +1687 -0
  35. rdfsolve-0.0.1/tests/test_data/aopwikirdf_schema.jsonld +412 -0
  36. rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_frequencies_basic.pkl +0 -0
  37. rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_frequencies_with_instances.pkl +0 -0
  38. rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_jsonld_schema.pkl +0 -0
  39. rdfsolve-0.0.1/tests/test_data/cache/aopwikirdf_voidparser.pkl +0 -0
  40. rdfsolve-0.0.1/tests/test_frequency_count.py +151 -0
  41. rdfsolve-0.0.1/tests/test_parser.py +301 -0
rdfsolve-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Javier Millán Acosta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,13 @@
1
+ graft src
2
+ graft tests
3
+ prune scripts
4
+ prune notebooks
5
+ prune tests/.pytest_cache
6
+ prune docs
7
+
8
+ global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle .idea/**
9
+
10
+ include README.md LICENSE Makefile
11
+ recursive-include data *.csv
12
+ recursive-include scripts *.sh
13
+ exclude tox.ini .readthedocs.yml .cruft.json CITATION.cff docker-compose.yml Dockerfile noxfile.py
@@ -0,0 +1,439 @@
1
+ # Makefile for rdfsolve notebook generation
2
+ # Can be used both locally and in GitHub Actions
3
+
4
+ # Variables - can be overridden by environment or command line
5
+ DATASET ?=
6
+ NOTEBOOK_TYPE ?= all
7
+ VENV_DIR ?= .venv
8
+ # Use absolute path for Python to avoid issues with cd
9
+ PYTHON ?= $(shell pwd)/$(VENV_DIR)/bin/python
10
+ PIP ?= $(shell pwd)/$(VENV_DIR)/bin/pip
11
+ JUPYTER ?= $(shell pwd)/$(VENV_DIR)/bin/jupyter
12
+ MAX_PARALLEL ?= 50
13
+ GITHUB_OUTPUT ?= /dev/null
14
+ GITHUB_STEP_SUMMARY ?= /dev/null
15
+
16
+ # Directories
17
+ NOTEBOOKS_DIR := notebooks
18
+ DOCS_DIR := docs
19
+ DOCS_NOTEBOOKS_DIR := $(DOCS_DIR)/notebooks
20
+ DOCS_DATA_DIR := $(DOCS_DIR)/data/schema_extraction
21
+ ARTIFACTS_DIR := artifacts
22
+
23
+ SCHEMA_NB_DIR := $(NOTEBOOKS_DIR)/01_schema_extraction
24
+ PYDANTIC_NB_DIR := $(NOTEBOOKS_DIR)/02_pydantic_models
25
+ NAMESPACE_NB_DIR := $(NOTEBOOKS_DIR)/03_bioregistry_namespaces
26
+
27
+ SCHEMA_DOCS_DIR := $(DOCS_NOTEBOOKS_DIR)/01_schema_extraction
28
+ PYDANTIC_DOCS_DIR := $(DOCS_NOTEBOOKS_DIR)/02_pydantic_models
29
+ NAMESPACE_DOCS_DIR := $(DOCS_NOTEBOOKS_DIR)/03_bioregistry_namespaces
30
+
31
+ # Colors for output
32
+ COLOR_RESET := \033[0m
33
+ COLOR_BOLD := \033[1m
34
+ COLOR_GREEN := \033[32m
35
+ COLOR_YELLOW := \033[33m
36
+ COLOR_BLUE := \033[34m
37
+
38
+ .PHONY: help
39
+ help: ## Show this help message
40
+ @echo "$(COLOR_BOLD)RDFSolve Notebook Generation Makefile$(COLOR_RESET)"
41
+ @echo ""
42
+ @echo "$(COLOR_BLUE)Usage:$(COLOR_RESET)"
43
+ @echo " make <target> [DATASET=<name>] [NOTEBOOK_TYPE=<type>]"
44
+ @echo ""
45
+ @echo "$(COLOR_BLUE)Targets:$(COLOR_RESET)"
46
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " $(COLOR_GREEN)%-30s$(COLOR_RESET) %s\n", $$1, $$2}'
47
+ @echo ""
48
+ @echo "$(COLOR_BLUE)Examples:$(COLOR_RESET)"
49
+ @echo " make all # Generate all notebooks for all datasets"
50
+ @echo " make schema DATASET=aopwikirdf # Generate schema notebook for one dataset"
51
+ @echo " make clean # Clean generated files"
52
+ @echo " make install-deps # Install Python dependencies"
53
+
54
+ .PHONY: all
55
+ all: install-deps setup ## Run complete notebook generation pipeline for DATASET or all datasets
56
+ @if [ -n "$(DATASET)" ]; then \
57
+ echo "$(COLOR_BLUE)Running pipeline for single dataset: $(DATASET)$(COLOR_RESET)"; \
58
+ $(MAKE) schema DATASET=$(DATASET); \
59
+ $(MAKE) pydantic DATASET=$(DATASET); \
60
+ $(MAKE) namespace DATASET=$(DATASET); \
61
+ else \
62
+ echo "$(COLOR_BLUE)Running pipeline for all datasets$(COLOR_RESET)"; \
63
+ $(MAKE) schema-all; \
64
+ $(MAKE) pydantic-all; \
65
+ $(MAKE) namespace-all; \
66
+ fi
67
+ @$(MAKE) collect
68
+
69
+ .PHONY: setup
70
+ setup: ## Create necessary directories
71
+ @echo "$(COLOR_BLUE)Creating output directories...$(COLOR_RESET)"
72
+ @mkdir -p $(SCHEMA_DOCS_DIR)
73
+ @mkdir -p $(PYDANTIC_DOCS_DIR)
74
+ @mkdir -p $(NAMESPACE_DOCS_DIR)
75
+ @mkdir -p $(DOCS_DATA_DIR)
76
+ @mkdir -p $(SCHEMA_NB_DIR)
77
+ @mkdir -p $(PYDANTIC_NB_DIR)
78
+ @mkdir -p $(NAMESPACE_NB_DIR)
79
+ @echo "$(COLOR_GREEN)✓ Directories created$(COLOR_RESET)"
80
+
81
+ .PHONY: venv
82
+ venv: ## Create virtual environment with uv (Python 3.10+)
83
+ @if [ -d "$(VENV_DIR)" ]; then \
84
+ echo "$(COLOR_GREEN)✓ Virtual environment already exists at $(VENV_DIR)$(COLOR_RESET)"; \
85
+ else \
86
+ echo "$(COLOR_BLUE)Creating virtual environment with uv...$(COLOR_RESET)"; \
87
+ if command -v uv >/dev/null 2>&1; then \
88
+ uv venv $(VENV_DIR) --python 3.10; \
89
+ echo "$(COLOR_GREEN)✓ Virtual environment created at $(VENV_DIR)$(COLOR_RESET)"; \
90
+ else \
91
+ echo "$(COLOR_YELLOW)Error: uv not found. Please install it first:$(COLOR_RESET)"; \
92
+ echo " curl -LsSf https://astral.sh/uv/install.sh | sh"; \
93
+ exit 1; \
94
+ fi; \
95
+ fi
96
+
97
+ .PHONY: check-venv
98
+ check-venv: venv ## Ensure virtual environment exists and check Python version
99
+ @if [ ! -f "$(PYTHON)" ]; then \
100
+ echo "$(COLOR_YELLOW)Virtual environment Python not found, recreating...$(COLOR_RESET)"; \
101
+ rm -rf $(VENV_DIR); \
102
+ $(MAKE) venv; \
103
+ fi
104
+ @python_version=$$($(PYTHON) -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || echo "0.0"); \
105
+ if [ "$$(echo "$$python_version >= 3.10" | bc -l)" -eq 0 ]; then \
106
+ echo "$(COLOR_YELLOW)Error: Python $$python_version found in venv, but >= 3.10 required$(COLOR_RESET)"; \
107
+ echo "Recreating virtual environment..."; \
108
+ rm -rf $(VENV_DIR); \
109
+ $(MAKE) venv; \
110
+ else \
111
+ echo "$(COLOR_GREEN)✓ Using Python $$python_version from $(VENV_DIR)$(COLOR_RESET)"; \
112
+ fi
113
+
114
+ .PHONY: install-deps
115
+ install-deps: check-venv ## Install Python dependencies with uv
116
+ @echo "$(COLOR_BLUE)Installing Python dependencies with uv...$(COLOR_RESET)"
117
+ @if command -v uv >/dev/null 2>&1; then \
118
+ uv pip install -e .[notebooks]; \
119
+ uv pip install jupyter nbconvert pandas; \
120
+ else \
121
+ echo "$(COLOR_YELLOW)uv not found, using pip...$(COLOR_RESET)"; \
122
+ $(PYTHON) -m pip install --upgrade pip; \
123
+ $(PIP) install -e .[notebooks]; \
124
+ $(PIP) install jupyter nbconvert pandas; \
125
+ fi
126
+ @echo "$(COLOR_GREEN)✓ Dependencies installed$(COLOR_RESET)"
127
+
128
+ .PHONY: install-system-deps
129
+ install-system-deps: ## Install system dependencies (requires sudo)
130
+ @echo "$(COLOR_BLUE)Installing system dependencies...$(COLOR_RESET)"
131
+ sudo apt-get update
132
+ sudo apt-get install -y pandoc
133
+ @echo "$(COLOR_GREEN)✓ System dependencies installed$(COLOR_RESET)"
134
+
135
+ .PHONY: list-datasets
136
+ list-datasets: ## List all available datasets from sources.csv
137
+ @echo "$(COLOR_BLUE)Available datasets:$(COLOR_RESET)"
138
+ @tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$' | sort
139
+
140
+ .PHONY: prepare-matrix
141
+ prepare-matrix: ## Generate dataset matrix (for CI)
142
+ @echo "$(COLOR_BLUE)Generating dataset matrix...$(COLOR_RESET)"
143
+ @datasets=$$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$' | jq -R . | jq -s . | jq -c .); \
144
+ echo "schema-matrix={\"dataset\":$$datasets}" >> $(GITHUB_OUTPUT); \
145
+ echo "pydantic-matrix={\"dataset\":$$datasets}" >> $(GITHUB_OUTPUT); \
146
+ echo "namespace-matrix={\"dataset\":$$datasets}" >> $(GITHUB_OUTPUT); \
147
+ dataset_count=$$(echo $$datasets | jq length); \
148
+ echo "Generated matrix with $$dataset_count datasets"; \
149
+ echo "Notebook type requested: $(NOTEBOOK_TYPE)"
150
+
151
+ .PHONY: generate-schema-nb
152
+ generate-schema-nb: ## Generate schema notebook for DATASET
153
+ @if [ -z "$(DATASET)" ]; then \
154
+ echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
155
+ echo "Usage: make generate-schema-nb DATASET=<name>"; \
156
+ exit 1; \
157
+ fi
158
+ @echo "$(COLOR_BLUE)Generating schema notebook for dataset: $(DATASET)$(COLOR_RESET)"
159
+ cd $(NOTEBOOKS_DIR) && $(PYTHON) make_notebooks.py --dataset "$(DATASET)" --type schema
160
+ @echo "$(COLOR_GREEN)✓ Schema notebook generated for $(DATASET)$(COLOR_RESET)"
161
+
162
+ .PHONY: execute-schema-nb
163
+ execute-schema-nb: setup ## Execute and convert schema notebook to HTML
164
+ @if [ -z "$(DATASET)" ]; then \
165
+ echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
166
+ exit 1; \
167
+ fi
168
+ @echo "$(COLOR_BLUE)Executing schema notebook for $(DATASET)...$(COLOR_RESET)"
169
+ @cd $(SCHEMA_NB_DIR) && \
170
+ notebook="$(DATASET)_schema.ipynb" && \
171
+ if $(JUPYTER) nbconvert \
172
+ --execute \
173
+ --to html \
174
+ "$$notebook" \
175
+ --output-dir ../../$(SCHEMA_DOCS_DIR) \
176
+ --ExecutePreprocessor.kernel_name=python3 ; then \
177
+ echo "SUCCESS=true" >> $(GITHUB_OUTPUT); \
178
+ echo "$(COLOR_GREEN)✓ Successfully converted: $$notebook$(COLOR_RESET)"; \
179
+ echo "**$(DATASET)**: Schema analysis completed successfully" >> $(GITHUB_STEP_SUMMARY); \
180
+ else \
181
+ echo "SUCCESS=false" >> $(GITHUB_OUTPUT); \
182
+ echo "$(COLOR_YELLOW)⚠ Failed to convert: $$notebook$(COLOR_RESET)"; \
183
+ echo "**$(DATASET)**: Schema analysis failed (timeout or error)" >> $(GITHUB_STEP_SUMMARY); \
184
+ $(MAKE) create-schema-error-report; \
185
+ fi
186
+
187
+ .PHONY: create-schema-error-report
188
+ create-schema-error-report: ## Create error report HTML for failed schema notebook
189
+ @echo "$(COLOR_YELLOW)Creating error report for $(DATASET)...$(COLOR_RESET)"
190
+ @cat > "$(SCHEMA_DOCS_DIR)/$(DATASET)_schema.html" << 'EOF'
191
+ <!DOCTYPE html>
192
+ <html>
193
+ <head>
194
+ <title>Schema Analysis Failed - $(DATASET)</title>
195
+ </head>
196
+ <body>
197
+ <h1>Schema Analysis Failed</h1>
198
+ <h2>Dataset Information</h2>
199
+ <p><strong>Dataset:</strong> $(DATASET)</p>
200
+ <p><strong>Attempted:</strong> <script>document.write(new Date().toUTCString())</script></p>
201
+ <h2>Manual Execution</h2>
202
+ <pre>cd notebooks/01_schema_extraction
203
+ python make_notebooks.py --dataset $(DATASET) --type schema
204
+ jupyter nbconvert --execute --to html $(DATASET)_schema.ipynb</pre>
205
+ </body>
206
+ </html>
207
+ EOF
208
+
209
+ .PHONY: schema
210
+ schema: install-deps setup generate-schema-nb execute-schema-nb ## Generate and execute schema notebook for DATASET
211
+
212
+ .PHONY: schema-all
213
+ schema-all: ## Generate and execute schema notebooks for all datasets
214
+ @echo "$(COLOR_BLUE)Processing all datasets for schema notebooks...$(COLOR_RESET)"
215
+ @for dataset in $$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$'); do \
216
+ echo "$(COLOR_BLUE)Processing: $$dataset$(COLOR_RESET)"; \
217
+ $(MAKE) schema DATASET=$$dataset || echo "$(COLOR_YELLOW)⚠ Failed: $$dataset$(COLOR_RESET)"; \
218
+ done
219
+
220
+ .PHONY: generate-pydantic-nb
221
+ generate-pydantic-nb: ## Generate pydantic notebook for DATASET
222
+ @if [ -z "$(DATASET)" ]; then \
223
+ echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
224
+ exit 1; \
225
+ fi
226
+ @echo "$(COLOR_BLUE)Generating pydantic notebook for dataset: $(DATASET)$(COLOR_RESET)"
227
+ cd $(NOTEBOOKS_DIR) && $(PYTHON) make_notebooks.py --dataset "$(DATASET)" --type pydantic
228
+ @echo "$(COLOR_GREEN)✓ Pydantic notebook generated for $(DATASET)$(COLOR_RESET)"
229
+
230
+ .PHONY: execute-pydantic-nb
231
+ execute-pydantic-nb: setup ## Execute and convert pydantic notebook to HTML
232
+ @if [ -z "$(DATASET)" ]; then \
233
+ echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
234
+ exit 1; \
235
+ fi
236
+ @echo "$(COLOR_BLUE)Executing pydantic notebook for $(DATASET)...$(COLOR_RESET)"
237
+ @cd $(PYDANTIC_NB_DIR) && \
238
+ notebook="$(DATASET)_pydantic.ipynb" && \
239
+ if $(JUPYTER) nbconvert \
240
+ --execute \
241
+ --to html \
242
+ "$$notebook" \
243
+ --output-dir ../../$(PYDANTIC_DOCS_DIR) \
244
+ --ExecutePreprocessor.kernel_name=python3 ; then \
245
+ echo "SUCCESS=true" >> $(GITHUB_OUTPUT); \
246
+ echo "$(COLOR_GREEN)✓ Successfully converted: $$notebook$(COLOR_RESET)"; \
247
+ echo "**$(DATASET)**: Pydantic model generation completed successfully" >> $(GITHUB_STEP_SUMMARY); \
248
+ else \
249
+ echo "SUCCESS=false" >> $(GITHUB_OUTPUT); \
250
+ echo "$(COLOR_YELLOW)⚠ Failed to convert: $$notebook$(COLOR_RESET)"; \
251
+ echo "**$(DATASET)**: Pydantic model generation failed (timeout or error)" >> $(GITHUB_STEP_SUMMARY); \
252
+ $(MAKE) create-pydantic-error-report; \
253
+ fi
254
+
255
+ .PHONY: create-pydantic-error-report
256
+ create-pydantic-error-report: ## Create error report HTML for failed pydantic notebook
257
+ @echo "$(COLOR_YELLOW)Creating error report for $(DATASET)...$(COLOR_RESET)"
258
+ @cat > "$(PYDANTIC_DOCS_DIR)/$(DATASET)_pydantic.html" << 'EOF'
259
+ <!DOCTYPE html>
260
+ <html>
261
+ <head>
262
+ <title>Pydantic Generation Failed - $(DATASET)</title>
263
+ </head>
264
+ <body>
265
+ <h1>Pydantic Generation Failed</h1>
266
+ <h2>Dataset Information</h2>
267
+ <p><strong>Dataset:</strong> $(DATASET)</p>
268
+ <p><strong>Attempted:</strong> <script>document.write(new Date().toUTCString())</script></p>
269
+ <h2>Manual Execution</h2>
270
+ <pre>cd notebooks
271
+ python make_notebooks.py --dataset $(DATASET) --type pydantic
272
+ cd 02_pydantic_models
273
+ jupyter nbconvert --execute --to html $(DATASET)_pydantic.ipynb</pre>
274
+ </body>
275
+ </html>
276
+ EOF
277
+
278
+ .PHONY: pydantic
279
+ pydantic: generate-pydantic-nb execute-pydantic-nb ## Generate and execute pydantic notebook for DATASET
280
+
281
+ .PHONY: pydantic-all
282
+ pydantic-all: ## Generate and execute pydantic notebooks for all datasets
283
+ @echo "$(COLOR_BLUE)Processing all datasets for pydantic notebooks...$(COLOR_RESET)"
284
+ @for dataset in $$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$'); do \
285
+ echo "$(COLOR_BLUE)Processing: $$dataset$(COLOR_RESET)"; \
286
+ $(MAKE) pydantic DATASET=$$dataset || echo "$(COLOR_YELLOW)⚠ Failed: $$dataset$(COLOR_RESET)"; \
287
+ done
288
+
289
+ .PHONY: generate-namespace-nb
290
+ generate-namespace-nb: ## Generate namespace notebook for DATASET
291
+ @if [ -z "$(DATASET)" ]; then \
292
+ echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
293
+ exit 1; \
294
+ fi
295
+ @echo "$(COLOR_BLUE)Generating namespace notebook for dataset: $(DATASET)$(COLOR_RESET)"
296
+ cd $(NOTEBOOKS_DIR) && $(PYTHON) make_notebooks.py --dataset "$(DATASET)" --type namespace
297
+ @echo "$(COLOR_GREEN)✓ Namespace notebook generated for $(DATASET)$(COLOR_RESET)"
298
+
299
+ .PHONY: execute-namespace-nb
300
+ execute-namespace-nb: setup ## Execute and convert namespace notebook to HTML
301
+ @if [ -z "$(DATASET)" ]; then \
302
+ echo "$(COLOR_YELLOW)Error: DATASET not specified$(COLOR_RESET)"; \
303
+ exit 1; \
304
+ fi
305
+ @echo "$(COLOR_BLUE)Executing namespace notebook for $(DATASET)...$(COLOR_RESET)"
306
+ @cd $(NAMESPACE_NB_DIR) && \
307
+ notebook="$(DATASET)_namespaces.ipynb" && \
308
+ if $(JUPYTER) nbconvert \
309
+ --execute \
310
+ --to html \
311
+ "$$notebook" \
312
+ --output-dir ../../$(NAMESPACE_DOCS_DIR) \
313
+ --ExecutePreprocessor.kernel_name=python3 ; then \
314
+ echo "SUCCESS=true" >> $(GITHUB_OUTPUT); \
315
+ echo "$(COLOR_GREEN)✓ Successfully converted: $$notebook$(COLOR_RESET)"; \
316
+ echo "**$(DATASET)**: Namespace discovery completed successfully" >> $(GITHUB_STEP_SUMMARY); \
317
+ else \
318
+ echo "SUCCESS=false" >> $(GITHUB_OUTPUT); \
319
+ echo "$(COLOR_YELLOW)⚠ Failed to convert: $$notebook$(COLOR_RESET)"; \
320
+ echo "**$(DATASET)**: Namespace discovery failed (timeout or error)" >> $(GITHUB_STEP_SUMMARY); \
321
+ $(MAKE) create-namespace-error-report; \
322
+ fi
323
+
324
+ .PHONY: create-namespace-error-report
325
+ create-namespace-error-report: ## Create error report HTML for failed namespace notebook
326
+ @echo "$(COLOR_YELLOW)Creating error report for $(DATASET)...$(COLOR_RESET)"
327
+ @cat > "$(NAMESPACE_DOCS_DIR)/$(DATASET)_namespaces.html" << 'EOF'
328
+ <!DOCTYPE html>
329
+ <html>
330
+ <head>
331
+ <title>Namespace Discovery Failed - $(DATASET)</title>
332
+ </head>
333
+ <body>
334
+ <h1>Namespace Discovery Failed</h1>
335
+ <h2>Dataset Information</h2>
336
+ <p><strong>Dataset:</strong> $(DATASET)</p>
337
+ <p><strong>Attempted:</strong> <script>document.write(new Date().toUTCString())</script></p>
338
+ <h2>Manual Execution</h2>
339
+ <pre>cd notebooks
340
+ python make_notebooks.py --dataset $(DATASET) --type namespace
341
+ cd 03_bioregistry_namespaces
342
+ jupyter nbconvert --execute --to html $(DATASET)_namespaces.ipynb</pre>
343
+ </body>
344
+ </html>
345
+ EOF
346
+
347
+ .PHONY: namespace
348
+ namespace: generate-namespace-nb execute-namespace-nb ## Generate and execute namespace notebook for DATASET
349
+
350
+ .PHONY: namespace-all
351
+ namespace-all: ## Generate and execute namespace notebooks for all datasets
352
+ @echo "$(COLOR_BLUE)Processing all datasets for namespace notebooks...$(COLOR_RESET)"
353
+ @for dataset in $$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$'); do \
354
+ echo "$(COLOR_BLUE)Processing: $$dataset$(COLOR_RESET)"; \
355
+ $(MAKE) namespace DATASET=$$dataset || echo "$(COLOR_YELLOW)⚠ Failed: $$dataset$(COLOR_RESET)"; \
356
+ done
357
+
358
+ .PHONY: collect
359
+ collect: ## Collect and organize all generated results
360
+ @echo "$(COLOR_BLUE)Collecting and organizing results...$(COLOR_RESET)"
361
+ @$(MAKE) setup
362
+ @if [ -d "$(ARTIFACTS_DIR)" ] && [ -n "$$(ls -A $(ARTIFACTS_DIR)/ 2>/dev/null)" ]; then \
363
+ echo "$(COLOR_BLUE)Collecting artifacts from matrix jobs...$(COLOR_RESET)"; \
364
+ $(MAKE) collect-artifacts; \
365
+ else \
366
+ echo "$(COLOR_YELLOW)No artifacts directory found, skipping artifact collection$(COLOR_RESET)"; \
367
+ fi
368
+ @$(MAKE) count-results
369
+ @$(MAKE) generate-results-json
370
+ @echo "$(COLOR_GREEN)✓ Results collected$(COLOR_RESET)"
371
+
372
+ .PHONY: collect-artifacts
373
+ collect-artifacts: ## Collect artifacts from CI runs
374
+ @echo "$(COLOR_BLUE)Copying artifacts...$(COLOR_RESET)"
375
+ @# Schema notebooks and HTML
376
+ @find $(ARTIFACTS_DIR)/ -name "*_schema.ipynb" -exec cp {} $(SCHEMA_NB_DIR)/ \; 2>/dev/null || true
377
+ @find $(ARTIFACTS_DIR)/ -name "*_schema.html" -exec cp {} $(SCHEMA_DOCS_DIR)/ \; 2>/dev/null || true
378
+ @# Pydantic notebooks and HTML
379
+ @find $(ARTIFACTS_DIR)/ -name "*_pydantic.ipynb" -exec cp {} $(PYDANTIC_NB_DIR)/ \; 2>/dev/null || true
380
+ @find $(ARTIFACTS_DIR)/ -name "*_pydantic.html" -exec cp {} $(PYDANTIC_DOCS_DIR)/ \; 2>/dev/null || true
381
+ @# Namespace notebooks and HTML
382
+ @find $(ARTIFACTS_DIR)/ -name "*_namespaces.ipynb" -exec cp {} $(NAMESPACE_NB_DIR)/ \; 2>/dev/null || true
383
+ @find $(ARTIFACTS_DIR)/ -name "*_namespaces.html" -exec cp {} $(NAMESPACE_DOCS_DIR)/ \; 2>/dev/null || true
384
+ @# Data files
385
+ @find $(ARTIFACTS_DIR)/ -path "*/data/schema_extraction/*" -type f \
386
+ \( -name "*.jsonld" -o -name "*.yaml" -o -name "*.csv" -o -name "*.ttl" -o -name "*.nq" -o -name "*.parquet" -o -name "*.json" -o -name "*.jsonl" \) \
387
+ -exec bash -c 'file="$$1"; relative_path="$${file#*/data/schema_extraction/}"; dataset_name=$$(echo "$$relative_path" | cut -d/ -f1); filename=$$(basename "$$relative_path"); mkdir -p "$(DOCS_DATA_DIR)/$$dataset_name"; cp "$$file" "$(DOCS_DATA_DIR)/$$dataset_name/$$filename"' _ {} \; 2>/dev/null || true
388
+
389
+ .PHONY: count-results
390
+ count-results: ## Count generated files
391
+ @echo "$(COLOR_BLUE)Final collection results:$(COLOR_RESET)"
392
+ @schema_nb=$$(find $(SCHEMA_NB_DIR)/ -name "*_schema.ipynb" 2>/dev/null | wc -l); \
393
+ schema_html=$$(find $(SCHEMA_DOCS_DIR)/ -name "*_schema.html" 2>/dev/null | wc -l); \
394
+ pydantic_nb=$$(find $(PYDANTIC_NB_DIR)/ -name "*_pydantic.ipynb" 2>/dev/null | wc -l); \
395
+ pydantic_html=$$(find $(PYDANTIC_DOCS_DIR)/ -name "*_pydantic.html" 2>/dev/null | wc -l); \
396
+ namespace_nb=$$(find $(NAMESPACE_NB_DIR)/ -name "*_namespaces.ipynb" 2>/dev/null | wc -l); \
397
+ namespace_html=$$(find $(NAMESPACE_DOCS_DIR)/ -name "*_namespaces.html" 2>/dev/null | wc -l); \
398
+ data_files=$$(find $(DOCS_DATA_DIR)/ -type f 2>/dev/null | wc -l); \
399
+ echo " Schema notebooks: $$schema_nb"; \
400
+ echo " Schema HTML files: $$schema_html"; \
401
+ echo " Pydantic notebooks: $$pydantic_nb"; \
402
+ echo " Pydantic HTML files: $$pydantic_html"; \
403
+ echo " Namespace notebooks: $$namespace_nb"; \
404
+ echo " Namespace HTML files: $$namespace_html"; \
405
+ echo " Data files: $$data_files"
406
+
407
+ .PHONY: generate-results-json
408
+ generate-results-json: ## Generate results.json for web interface
409
+ @echo "$(COLOR_BLUE)Generating results.json...$(COLOR_RESET)"
410
+ @bash scripts/generate_results_json.sh
411
+
412
+ .PHONY: clean
413
+ clean: ## Remove generated notebooks and HTML files
414
+ @echo "$(COLOR_YELLOW)Cleaning generated files...$(COLOR_RESET)"
415
+ @rm -f $(SCHEMA_NB_DIR)/*_schema.ipynb
416
+ @rm -f $(SCHEMA_DOCS_DIR)/*_schema.html
417
+ @rm -f $(PYDANTIC_NB_DIR)/*_pydantic.ipynb
418
+ @rm -f $(PYDANTIC_DOCS_DIR)/*_pydantic.html
419
+ @rm -f $(NAMESPACE_NB_DIR)/*_namespaces.ipynb
420
+ @rm -f $(NAMESPACE_DOCS_DIR)/*_namespaces.html
421
+ @rm -f $(DOCS_DIR)/results.json
422
+ @echo "$(COLOR_GREEN)✓ Cleaned$(COLOR_RESET)"
423
+
424
+ .PHONY: clean-all
425
+ clean-all: clean ## Remove all generated files including data
426
+ @echo "$(COLOR_YELLOW)Cleaning all generated files including data...$(COLOR_RESET)"
427
+ @rm -rf $(DOCS_DATA_DIR)/*
428
+ @rm -rf $(ARTIFACTS_DIR)
429
+ @echo "$(COLOR_GREEN)✓ All cleaned$(COLOR_RESET)"
430
+
431
+ .PHONY: test-one
432
+ test-one: ## Quick test with one dataset (usage: make test-one DATASET=aopwikirdf)
433
+ @if [ -z "$(DATASET)" ]; then \
434
+ DATASET=$$(tail -n +2 data/sources.csv | cut -d',' -f1 | grep -v '^$$' | head -1); \
435
+ echo "$(COLOR_BLUE)No DATASET specified, using first dataset: $$DATASET$(COLOR_RESET)"; \
436
+ $(MAKE) schema DATASET=$$DATASET; \
437
+ else \
438
+ $(MAKE) schema DATASET=$(DATASET); \
439
+ fi