npm - @aborruso/ckan-mcp-server - Versions diffs - 0.4.13 → 0.4.15 - Mend

@aborruso/ckan-mcp-server 0.4.13 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CLAUDE.md +7 -7
package/EXAMPLES.md +18 -0
package/LOG.md +45 -0
package/PRD.md +3 -3
package/README.md +19 -8
package/dist/index.js +191 -20
package/dist/worker.js +31 -27
package/examples/langgraph/01_basic_workflow.py +277 -0
package/examples/langgraph/02_data_exploration.py +366 -0
package/examples/langgraph/README.md +719 -0
package/examples/langgraph/metadata_quality.py +299 -0
package/examples/langgraph/requirements.txt +12 -0
package/examples/langgraph/setup.sh +32 -0
package/examples/langgraph/test_setup.py +106 -0
package/openspec/changes/add-mqa-quality-tool/proposal.md +21 -0
package/openspec/changes/add-mqa-quality-tool/specs/ckan-quality/spec.md +71 -0
package/openspec/changes/add-mqa-quality-tool/tasks.md +29 -0
package/package.json +1 -1

package/examples/langgraph/metadata_quality.py ADDED Viewed

@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+"""
+Metadata Quality Scoring for CKAN Datasets
+Advanced quality scoring system based on:
+- Completeness (required and recommended fields)
+- Richness (descriptions, tags, temporal coverage)
+- Resources quality (formats, accessibility)
+- Temporal freshness
+Score: 0-100 points
+"""
+from datetime import datetime
+from typing import Any
+class MetadataQualityScorer:
+    """Calculate metadata quality score for CKAN datasets."""
+    # Quality thresholds
+    EXCELLENT = 80
+    GOOD = 60
+    ACCEPTABLE = 40
+    POOR = 0
+    @classmethod
+    def score_dataset(cls, dataset: dict[str, Any]) -> dict[str, Any]:
+        """
+        Calculate comprehensive quality score.
+        Returns:
+            {
+                "score": 75,              # Total score 0-100
+                "level": "good",          # excellent/good/acceptable/poor
+                "breakdown": {
+                    "completeness": 20,   # out of 30
+                    "richness": 15,       # out of 30
+                    "resources": 25,      # out of 30
+                    "freshness": 8        # out of 10
+                },
+                "issues": ["Missing license", ...]
+            }
+        """
+        issues = []
+        breakdown = {
+            "completeness": cls._score_completeness(dataset, issues),
+            "richness": cls._score_richness(dataset, issues),
+            "resources": cls._score_resources(dataset, issues),
+            "freshness": cls._score_freshness(dataset, issues),
+        }
+        total_score = sum(breakdown.values())
+        level = cls._get_level(total_score)
+        return {
+            "score": total_score,
+            "level": level,
+            "breakdown": breakdown,
+            "issues": issues,
+        }
+    @classmethod
+    def _score_completeness(cls, dataset: dict, issues: list) -> int:
+        """Score 0-30: Required and recommended fields."""
+        score = 0
+        # Required fields (15 points)
+        if dataset.get("title"):
+            score += 5
+        else:
+            issues.append("Missing title")
+        if dataset.get("notes"):  # Description
+            score += 5
+        else:
+            issues.append("Missing description")
+        if dataset.get("name"):  # Identifier
+            score += 5
+        else:
+            issues.append("Missing identifier")
+        # Recommended fields (15 points)
+        if dataset.get("license_id"):
+            score += 3
+        else:
+            issues.append("Missing license")
+        if dataset.get("author") or dataset.get("maintainer"):
+            score += 3
+        else:
+            issues.append("Missing author/maintainer")
+        if dataset.get("author_email") or dataset.get("maintainer_email"):
+            score += 3
+        else:
+            issues.append("Missing contact email")
+        # Organization
+        if dataset.get("organization"):
+            score += 3
+        else:
+            issues.append("Not assigned to organization")
+        # Geographical coverage
+        if dataset.get("extras"):
+            has_geo = any(
+                e.get("key") in ["spatial", "geographic_coverage"]
+                for e in dataset.get("extras", [])
+            )
+            if has_geo:
+                score += 3
+        return score
+    @classmethod
+    def _score_richness(cls, dataset: dict, issues: list) -> int:
+        """Score 0-30: Richness of metadata."""
+        score = 0
+        # Description quality (10 points)
+        notes = dataset.get("notes", "")
+        if len(notes) > 200:
+            score += 10
+        elif len(notes) > 100:
+            score += 5
+        elif len(notes) > 0:
+            score += 2
+        else:
+            issues.append("Very short or missing description")
+        # Tags (10 points)
+        tags = dataset.get("tags", [])
+        num_tags = len(tags)
+        if num_tags >= 5:
+            score += 10
+        elif num_tags >= 3:
+            score += 6
+        elif num_tags >= 1:
+            score += 3
+        else:
+            issues.append("No tags")
+        # Temporal coverage (5 points)
+        extras = {e.get("key"): e.get("value") for e in dataset.get("extras", [])}
+        if "temporal_start" in extras or "temporal_end" in extras:
+            score += 3
+        # Frequency/update schedule (5 points)
+        if extras.get("frequency") or extras.get("update_frequency"):
+            score += 2
+        return score
+    @classmethod
+    def _score_resources(cls, dataset: dict, issues: list) -> int:
+        """Score 0-30: Resources quality."""
+        score = 0
+        resources = dataset.get("resources", [])
+        if not resources:
+            issues.append("No resources")
+            return 0
+        # At least one resource (5 points)
+        score += 5
+        # Check formats (10 points)
+        formats = {r.get("format", "").upper() for r in resources}
+        open_formats = {"CSV", "JSON", "GEOJSON", "XML", "RDF", "JSONLD"}
+        if formats & open_formats:
+            score += 10
+            if "CSV" in formats:
+                score += 2  # Bonus for CSV
+        else:
+            issues.append("No open formats (CSV/JSON/XML)")
+        # Resource descriptions (5 points)
+        described = sum(1 for r in resources if r.get("description"))
+        if described == len(resources):
+            score += 5
+        elif described > 0:
+            score += 2
+        # DataStore availability (5 points)
+        has_datastore = any(r.get("datastore_active") for r in resources)
+        if has_datastore:
+            score += 5
+        # URLs validity (5 points)
+        valid_urls = sum(
+            1 for r in resources if r.get("url") and r["url"].startswith("http")
+        )
+        if valid_urls == len(resources):
+            score += 5
+        elif valid_urls > 0:
+            score += 2
+        else:
+            issues.append("Invalid or missing resource URLs")
+        return score
+    @classmethod
+    def _score_freshness(cls, dataset: dict, issues: list) -> int:
+        """Score 0-10: Temporal freshness."""
+        score = 0
+        # Check metadata_modified
+        modified_str = dataset.get("metadata_modified")
+        if not modified_str:
+            issues.append("No last modified date")
+            return 0
+        try:
+            modified = datetime.fromisoformat(modified_str.replace("Z", "+00:00"))
+            now = datetime.now(modified.tzinfo)
+            days_old = (now - modified).days
+            if days_old < 90:  # < 3 months
+                score = 10
+            elif days_old < 180:  # < 6 months
+                score = 7
+            elif days_old < 365:  # < 1 year
+                score = 5
+            elif days_old < 730:  # < 2 years
+                score = 3
+            else:
+                score = 1
+                issues.append(f"Last updated {days_old} days ago")
+        except (ValueError, AttributeError):
+            issues.append("Invalid date format")
+        return score
+    @classmethod
+    def _get_level(cls, score: int) -> str:
+        """Convert score to quality level."""
+        if score >= cls.EXCELLENT:
+            return "excellent"
+        elif score >= cls.GOOD:
+            return "good"
+        elif score >= cls.ACCEPTABLE:
+            return "acceptable"
+        else:
+            return "poor"
+# Example usage
+if __name__ == "__main__":
+    # Sample dataset
+    sample_dataset = {
+        "title": "Sample Dataset",
+        "name": "sample-dataset",
+        "notes": "This is a sample dataset with a detailed description " * 5,
+        "license_id": "cc-by-4.0",
+        "author": "Mario Rossi",
+        "author_email": "mario@example.com",
+        "organization": {"name": "comune-roma"},
+        "tags": [
+            {"name": "environment"},
+            {"name": "air-quality"},
+            {"name": "open-data"},
+        ],
+        "resources": [
+            {
+                "format": "CSV",
+                "url": "https://example.com/data.csv",
+                "description": "Data in CSV format",
+                "datastore_active": True,
+            },
+            {
+                "format": "JSON",
+                "url": "https://example.com/data.json",
+                "description": "Data in JSON format",
+            },
+        ],
+        "metadata_modified": "2025-01-15T10:00:00Z",
+    }
+    scorer = MetadataQualityScorer()
+    result = scorer.score_dataset(sample_dataset)
+    print("Metadata Quality Assessment")
+    print("=" * 50)
+    print(f"Overall Score: {result['score']}/100")
+    print(f"Quality Level: {result['level'].upper()}")
+    print(f"\nBreakdown:")
+    for category, score in result["breakdown"].items():
+        print(
+            f"  {category.capitalize():15} {score:2}/30"
+            if category != "freshness"
+            else f"  {category.capitalize():15} {score:2}/10"
+        )
+    if result["issues"]:
+        print(f"\nIssues ({len(result['issues'])}):")
+        for issue in result["issues"]:
+            print(f"  - {issue}")

package/examples/langgraph/requirements.txt ADDED Viewed

@@ -0,0 +1,12 @@
+# LangGraph Examples - Python Dependencies
+# Core dependencies
+langgraph>=0.2.0
+langchain-core>=0.3.0
+# MCP Python SDK for client connection
+mcp>=1.0.0
+# Optional: LangSmith for debugging/tracing
+# Uncomment if you want to use LangSmith
+# langsmith>=0.1.0

package/examples/langgraph/setup.sh ADDED Viewed

@@ -0,0 +1,32 @@
+#!/bin/bash
+# Setup script for LangGraph examples
+set -e
+echo "Setting up LangGraph examples environment..."
+# Check Python version
+python3 --version
+# Create virtual environment if it doesn't exist
+if [ ! -d "venv" ]; then
+	echo "Creating virtual environment..."
+	python3 -m venv venv
+fi
+# Activate virtual environment
+source venv/bin/activate
+# Install dependencies
+echo "Installing dependencies..."
+pip install -r requirements.txt
+echo ""
+echo "✓ Setup complete!"
+echo ""
+echo "To activate the environment:"
+echo "  source venv/bin/activate"
+echo ""
+echo "To run examples:"
+echo "  python 01_basic_workflow.py"
+echo "  python 02_data_exploration.py"

package/examples/langgraph/test_setup.py ADDED Viewed

@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""
+Quick test to verify LangGraph + MCP setup
+Run:
+    uvx --with langgraph --with mcp --with langchain-core python test_setup.py
+"""
+import sys
+def test_imports():
+    """Test that all required packages are available."""
+    print("Testing imports...")
+    errors = []
+    try:
+        import langgraph  # noqa: F401
+        print("✓ langgraph")
+    except ImportError as e:
+        errors.append(f"✗ langgraph: {e}")
+    try:
+        import mcp  # noqa: F401
+        print("✓ mcp")
+    except ImportError as e:
+        errors.append(f"✗ mcp: {e}")
+    try:
+        import langchain_core  # noqa: F401
+        print("✓ langchain_core")
+    except ImportError as e:
+        errors.append(f"✗ langchain_core: {e}")
+    return errors
+def test_mcp_server():
+    """Test that MCP server file exists."""
+    import os
+    print("\nTesting MCP server...")
+    server_path = os.path.join(os.path.dirname(__file__), "../../dist/index.js")
+    if os.path.exists(server_path):
+        print(f"✓ MCP server found: {server_path}")
+        return []
+    else:
+        return [
+            f"✗ MCP server not found: {server_path}",
+            "  Run: cd ../.. && npm run build",
+        ]
+def test_node():
+    """Test that Node.js is available."""
+    import subprocess
+    print("\nTesting Node.js...")
+    try:
+        result = subprocess.run(
+            ["node", "--version"], capture_output=True, text=True, check=True
+        )
+        version = result.stdout.strip()
+        print(f"✓ Node.js {version}")
+        return []
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return ["✗ Node.js not found or not in PATH"]
+def main():
+    """Run all tests."""
+    print("=" * 60)
+    print("LangGraph + CKAN MCP Setup Test")
+    print("=" * 60)
+    all_errors = []
+    # Run tests
+    all_errors.extend(test_imports())
+    all_errors.extend(test_node())
+    all_errors.extend(test_mcp_server())
+    # Summary
+    print("\n" + "=" * 60)
+    if all_errors:
+        print("SETUP INCOMPLETE")
+        print("=" * 60)
+        for error in all_errors:
+            print(error)
+        print("\nSee README.md for setup instructions")
+        sys.exit(1)
+    else:
+        print("✓ ALL TESTS PASSED")
+        print("=" * 60)
+        print("\nYou can now run:")
+        print("  python 01_basic_workflow.py")
+        print("  python 02_data_exploration.py")
+        sys.exit(0)
+if __name__ == "__main__":
+    main()

package/openspec/changes/add-mqa-quality-tool/proposal.md ADDED Viewed

@@ -0,0 +1,21 @@
+# Change: Add MQA Quality Score Tool for dati.gov.it
+## Why
+Datasets on dati.gov.it display quality scores (Eccellente, Buono, etc.) calculated by data.europa.eu's MQA (Metadata Quality Assurance) system. Currently there's no way to access these quality metrics through the MCP server, limiting users' ability to evaluate dataset quality programmatically.
+## What Changes
+- Add `ckan_get_mqa_quality` tool for retrieving quality metrics from data.europa.eu
+- Tool works only with dati.gov.it server (validated at runtime)
+- Fetches dataset identifier from CKAN, then queries MQA API
+- Returns quality score and detailed metrics (accessibility, reusability, interoperability, findability)
+- Supports both markdown and JSON output formats
+## Impact
+- Affected specs: New capability `ckan-quality`
+- Affected code:
+  - New file: `src/tools/quality.ts` (tool handler)
+  - New file: `tests/integration/quality.test.ts` (tests with mocked responses)
+  - New file: `tests/fixtures/responses/mqa-quality.json` (mock data)
+  - Modified: `src/server.ts` (register new tool)
+  - Modified: `README.md` (document new tool)
+  - Modified: `EXAMPLES.md` (add usage examples)

package/openspec/changes/add-mqa-quality-tool/specs/ckan-quality/spec.md ADDED Viewed

@@ -0,0 +1,71 @@
+## ADDED Requirements
+### Requirement: MQA Quality Score Retrieval
+The system SHALL provide a tool to retrieve MQA (Metadata Quality Assurance) quality metrics from data.europa.eu for datasets published on dati.gov.it.
+#### Scenario: Successful quality score retrieval
+- **GIVEN** a valid dataset ID from dati.gov.it
+- **WHEN** user requests quality metrics
+- **THEN** system SHALL fetch identifier field from CKAN package_show
+- **AND** system SHALL query data.europa.eu MQA API
+- **AND** system SHALL return quality score and detailed metrics (accessibility, reusability, interoperability, findability)
+#### Scenario: Identifier fallback to name
+- **GIVEN** a dataset with empty identifier field
+- **WHEN** user requests quality metrics
+- **THEN** system SHALL use the name field as fallback identifier for MQA API query
+#### Scenario: Dataset not found
+- **GIVEN** an invalid or non-existent dataset ID
+- **WHEN** user requests quality metrics
+- **THEN** system SHALL return clear error message indicating dataset not found
+#### Scenario: MQA API unavailable
+- **GIVEN** data.europa.eu MQA API is unavailable or returns error
+- **WHEN** user requests quality metrics
+- **THEN** system SHALL return clear error message indicating MQA service unavailability
+### Requirement: Server Validation
+The system SHALL restrict MQA quality queries to dati.gov.it server only.
+#### Scenario: Valid dati.gov.it server
+- **GIVEN** server_url is "https://www.dati.gov.it/opendata" or "https://dati.gov.it/opendata"
+- **WHEN** user requests quality metrics
+- **THEN** system SHALL proceed with MQA query
+#### Scenario: Invalid server URL
+- **GIVEN** server_url is not dati.gov.it (e.g., "https://catalog.data.gov")
+- **WHEN** user requests quality metrics
+- **THEN** system SHALL reject request with error message explaining MQA is only available for dati.gov.it
+### Requirement: Output Formats
+The system SHALL support both markdown and JSON output formats for quality metrics.
+#### Scenario: Markdown format (default)
+- **GIVEN** user does not specify response_format or specifies "markdown"
+- **WHEN** quality metrics are retrieved
+- **THEN** system SHALL return human-readable markdown with:
+  - Overall quality score
+  - Breakdown by dimension (accessibility, reusability, interoperability, findability)
+  - Key findings and recommendations
+#### Scenario: JSON format
+- **GIVEN** user specifies response_format as "json"
+- **WHEN** quality metrics are retrieved
+- **THEN** system SHALL return complete MQA API response as structured JSON
+### Requirement: Tool Parameters
+The system SHALL accept the following parameters for the MQA quality tool:
+- server_url (required): Base URL of dati.gov.it portal
+- dataset_id (required): Dataset ID or name
+- response_format (optional): "markdown" (default) or "json"
+#### Scenario: Minimal parameters
+- **GIVEN** user provides only server_url and dataset_id
+- **WHEN** tool is invoked
+- **THEN** system SHALL use default markdown format
+#### Scenario: All parameters specified
+- **GIVEN** user provides server_url, dataset_id, and response_format
+- **WHEN** tool is invoked
+- **THEN** system SHALL use specified format for output

package/openspec/changes/add-mqa-quality-tool/tasks.md ADDED Viewed

@@ -0,0 +1,29 @@
+# Implementation Tasks
+## 1. Core Implementation
+- [x] 1.1 Create `src/tools/quality.ts` with `ckan_get_mqa_quality` tool handler
+- [x] 1.2 Implement server URL validation (dati.gov.it only)
+- [x] 1.3 Add CKAN package_show call to extract identifier field
+- [x] 1.4 Add MQA API client (https://data.europa.eu/api/mqa/cache/datasets/{id})
+- [x] 1.5 Implement markdown and JSON formatters for quality metrics
+- [x] 1.6 Register tool in `src/server.ts`
+## 2. Testing
+- [x] 2.1 Create mock fixtures for CKAN package_show response
+- [x] 2.2 Create mock fixtures for MQA API response
+- [x] 2.3 Write integration tests for successful quality retrieval
+- [x] 2.4 Write tests for error scenarios (invalid server, dataset not found, MQA API unavailable)
+- [x] 2.5 Write tests for fallback from identifier to name field
+- [x] 2.6 Verify test coverage matches project standards
+## 3. Documentation
+- [x] 3.1 Add tool description to README.md
+- [x] 3.2 Add usage examples to EXAMPLES.md
+- [x] 3.3 Document server restriction (dati.gov.it only)
+- [x] 3.4 Document quality metrics structure (score, accessibility, reusability, interoperability, findability)
+## 4. Validation
+- [x] 4.1 Run full test suite (npm test) - 212 tests passing
+- [x] 4.2 Test manually with real dati.gov.it dataset
+- [x] 4.3 Verify error handling for non-dati.gov.it servers
+- [x] 4.4 Build project successfully (npm run build)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aborruso/ckan-mcp-server",
-  "version": "0.4.13",
+  "version": "0.4.15",
   "description": "MCP server for interacting with CKAN open data portals",
   "main": "dist/index.js",
   "type": "module",