PyPI - ADFMentor - Versions diffs - 0.3.0__tar.gz - Mend

ADFMentor 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

adfmentor-0.3.0/ADFMentor.egg-info/PKG-INFO +370 -0
adfmentor-0.3.0/ADFMentor.egg-info/SOURCES.txt +20 -0
adfmentor-0.3.0/ADFMentor.egg-info/dependency_links.txt +1 -0
adfmentor-0.3.0/ADFMentor.egg-info/requires.txt +2 -0
adfmentor-0.3.0/ADFMentor.egg-info/top_level.txt +3 -0
adfmentor-0.3.0/ADFmentor/__init__.py +3 -0
adfmentor-0.3.0/ADFmentor/core.py +121 -0
adfmentor-0.3.0/ADFmentor/models/__init__.py +9 -0
adfmentor-0.3.0/ADFmentor/models/gemini.py +80 -0
adfmentor-0.3.0/ADFmentor/models/model.py +63 -0
adfmentor-0.3.0/ADFmentor/utils/__init__.py +20 -0
adfmentor-0.3.0/ADFmentor/utils/checker.py +61 -0
adfmentor-0.3.0/ADFmentor/utils/extractor.py +56 -0
adfmentor-0.3.0/ADFmentor/utils/path_handler.py +28 -0
adfmentor-0.3.0/ADFmentor/utils/processor.py +461 -0
adfmentor-0.3.0/ADFmentor/utils/question_parser.py +41 -0
adfmentor-0.3.0/LICENSE +21 -0
adfmentor-0.3.0/PKG-INFO +370 -0
adfmentor-0.3.0/README.md +349 -0
adfmentor-0.3.0/pyproject.toml +34 -0
adfmentor-0.3.0/setup.cfg +4 -0
adfmentor-0.3.0/tests/test.py +71 -0

adfmentor-0.3.0/ADFMentor.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,370 @@
+Metadata-Version: 2.4
+Name: ADFMentor
+Version: 0.3.0
+Summary: Parse Azure Data Factory project files and evaluate them using AI models.
+Author-email: Qobiljon Xayrullayev <qobiljonkhayrullayev@gmail.com>
+License: MIT
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: google-genai>=1.0.0
+Requires-Dist: python-dotenv>=1.0.0
+Dynamic: license-file
+ADFMentor
+=========
+A Python package for parsing Azure Data Factory (ADF) project files and evaluating them using AI models like Google Gemini.
+## Features
+- 🏗️ **ADF Processing**: Parse ADF pipeline, dataset, linked service, trigger, and dataflow JSON files
+- **Pipeline** — Parse ADF pipeline JSON files (activities, dependencies, parameters)
+- **Written** — Evaluate text-based answers about ADF concepts
+- 📝 **Detailed Reports**: Generate comprehensive grading reports from ADF project structures
+- 📦 **ZIP Support**: Automatically handles ZIP file submissions — no manual extraction needed
+- 🗂️ **Flexible Inputs**: Accepts a directory, a ZIP, or a single file (`.json`, `.txt`)
+- 🔍 **Auto File Discovery**: Locates ADF resource folders (`pipeline/`, `dataset/`, `linkedService/`, etc.)
+- ⚠️ **Graceful Missing-File Scoring**: Missing required files yield a `0` score and clear feedback
+- 🧩 **Lesson Question Parser**: Parse structured lesson text into `pipeline` and `text` question blocks
+- 🔧 **Easy Integration**: Simple API for evaluating student assignments and projects
+## Installation
+### From Source
+```bash
+git clone https://github.com/yourusername/ADFMentor.git
+cd ADFMentor
+pip install -e .
+```
+### Using pip (when published)
+```bash
+pip install ADFMentor
+```
+## Quick Start
+```python
+from ADFmentor import ADFMentor
+# Initialize with your Gemini API key
+mentor = ADFMentor(api_key="your-api-key")
+# Evaluate a full submission (pipeline + written)
+# Works with directories, ZIP files, or single files (.json/.txt)
+questions = """
+PIPELINE: Create an ADF pipeline to copy data from Blob Storage to SQL Database
+TEXT: Explain your pipeline design choices
+"""
+prompts = {
+    "pipeline": "Evaluate pipeline structure, activities, and best practices",
+    "text": "Evaluate clarity and reasoning",
+}
+result = mentor.evaluate_all(
+    answer_path="path/to/submission/",  # or "submission.zip"
+    questions=questions,
+    prompts=prompts,
+)
+print(f"Score: {result['score']}/100")
+print(f"Feedback:\n{result['feedback']}")
+```
+## Package Structure
+```
+ADFMentor/
+├── __init__.py           # Main package entry point
+├── core.py               # ADFMentor class with evaluation methods
+├── models/               # AI model wrappers
+│   ├── __init__.py
+│   ├── model.py          # Abstract base model
+│   └── gemini.py         # Google Gemini implementation
+└── utils/                # Utility functions
+    ├── __init__.py
+    ├── processor.py      # ADF JSON parsing and report generation
+    ├── checker.py        # File discovery helpers
+    ├── extractor.py      # ZIP extraction utilities
+    └── question_parser.py # Lesson question parser helpers
+```
+## Core Components
+### ADFMentor Class
+The main class provides a single evaluation method:
+- **`evaluate_all(answer_path, questions, prompts)`**: Evaluates pipeline structure and written answers together and returns an overall score and combined feedback
+Notes:
+- `answer_path` can be a directory, ZIP file, or a single submission file (`.json`, `.txt`).
+- `questions` and `prompts` must include `pipeline` and `text` keys. A section is skipped if its question is set to `None`.
+### ADF Processor
+`ADFMentor.utils.processor` provides functions for processing ADF projects:
+#### `parse_adf_json(json_path)`
+Reads and parses a single ADF resource JSON file.
+#### `discover_adf_resources(directory)`
+Scans a directory for ADF resource folders:
+- `pipeline/` — Pipeline definitions
+- `dataset/` — Dataset definitions
+- `linkedService/` — Linked service definitions
+- `trigger/` — Trigger definitions
+- `dataflow/` — Dataflow definitions
+#### `extract_grading_info(resources)`
+Extracts key elements for grading:
+- Pipelines: activities, dependencies, parameters, variables
+- Datasets: type, linked service reference, schema, location
+- Linked Services: type, connection details (sanitized)
+- Triggers: type, schedule, pipeline references
+- Dataflows: sources, sinks, transformations
+#### `generate_grading_report(grading_info)`
+Formats extracted information into a readable text report.
+#### `analyze_adf(adf_path)`
+Convenience function that chains all steps above.
+### AI Models
+#### Gemini Model
+`ADFMentor.models.gemini.Gemini`
+```python
+from ADFmentor.models import Gemini
+model = Gemini(api_key="your-api-key", model_name="gemini-2.0-flash-exp")
+# Evaluate text-based answers
+result = model.evaluate(
+    question="What are ADF linked services?",
+    answer="Linked services are connection strings...",
+    prompt="Evaluate for accuracy and completeness"
+)
+```
+**Response Format:**
+```json
+{
+  "score": 85,
+  "feedback": "Strong implementation with minor issues..."
+}
+```
+## Configuration
+### API Key Setup
+Create a `.env` file in your project root:
+```env
+API_KEY=your_gemini_api_key_here
+```
+Load it in your code:
+```python
+from dotenv import load_dotenv
+import os
+load_dotenv()
+api_key = os.getenv("API_KEY")
+```
+## Detailed Usage Examples
+### 1. Analyze an ADF Project
+```python
+from ADFmentor.utils import analyze_adf
+# Generate a detailed report from an ADF project directory
+report = analyze_adf("path/to/adf-project/")
+print(report)
+```
+**Sample Output:**
+```
+============================================================
+AZURE DATA FACTORY PROJECT REPORT
+============================================================
+PIPELINES:
+  - CopyBlobToSQL
+    Parameters: inputPath, outputTable
+    Activities (3):
+      • LookupSource (type: Lookup)
+      • CopyData (type: Copy)
+        depends on: LookupSource [Succeeded]
+        source: BlobSource
+        sink: SqlSink
+      • StoredProcedure (type: SqlServerStoredProcedure)
+        depends on: CopyData [Succeeded]
+DATASETS:
+  - BlobInput (type: DelimitedText)
+    linked service: AzureBlobStorage
+    location: type: AzureBlobStorageLocation, folder: input
+  - SqlOutput (type: AzureSqlTable)
+    linked service: AzureSqlDatabase
+    table: dbo.SalesData
+LINKED SERVICES:
+  - AzureBlobStorage (type: AzureBlobStorage)
+  - AzureSqlDatabase (type: AzureSqlDatabase)
+TRIGGERS:
+  - DailyTrigger (type: ScheduleTrigger)
+    schedule: every 1 Day
+    pipelines: CopyBlobToSQL
+DATAFLOWS:
+  none
+SUMMARY:
+  - total_pipelines: 1
+  - total_activities: 3
+  - total_datasets: 2
+  - total_linked_services: 2
+  - total_triggers: 1
+  - total_dataflows: 0
+```
+### 2. Complete Evaluation Pipeline
+```python
+from ADFmentor import ADFMentor
+mentor = ADFMentor(api_key="your-api-key")
+# Define questions and prompts for each evaluation type
+questions = {
+    "pipeline": "Create a pipeline to copy data from Blob to SQL with error handling",
+    "text": "Explain your pipeline design choices"
+}
+prompts = {
+    "pipeline": "Evaluate pipeline structure, activities, error handling, and best practices",
+    "text": "Evaluate clarity, justification, and understanding"
+}
+# Evaluate all aspects
+result = mentor.evaluate_all(
+    answer_path="path/to/student/submission/",
+    questions=questions,
+    prompts=prompts
+)
+print(f"Overall Score: {result['score']}/100")
+print(f"Feedback:\n{result['feedback']}")
+```
+### 3. Parse Lesson Questions
+If your lesson content uses codes like `"TEXT001"`, `"PIPELINE002"`, you can parse it into question blocks:
+```python
+from ADFmentor.utils.question_parser import parse_lesson_questions
+lesson_text = """
+"TEXT001"
+1. Explain the purpose of linked services in ADF.
+"PIPELINE002"
+2. Create a pipeline to copy data from Blob Storage to SQL Database.
+"""
+questions = parse_lesson_questions(lesson_text)
+# -> {"text": "1. ...", "pipeline": "2. ..."}
+# Map to the evaluate_all() schema
+questions = {
+    "pipeline": questions["pipeline"],
+    "text": questions["text"],
+}
+```
+### 4. Skipping an Evaluation Section
+To skip a section, set its question to `None` (the key must still exist):
+```python
+questions = {
+    "pipeline": "Create a copy pipeline with parameterized paths",
+    "text": None,
+}
+prompts = {
+    "pipeline": "Evaluate pipeline structure and best practices",
+    "text": "Evaluate clarity, justification, and understanding",
+}
+```
+## ADF Project Structure
+ADFMentor expects submissions to follow the standard ADF project structure:
+```
+adf-project/
+├── pipeline/          # Pipeline JSON definitions
+│   └── CopyPipeline.json
+├── dataset/           # Dataset JSON definitions
+│   ├── BlobInput.json
+│   └── SqlOutput.json
+├── linkedService/     # Linked service definitions
+│   ├── BlobStorage.json
+│   └── SqlDatabase.json
+├── trigger/           # Trigger definitions
+│   └── DailyTrigger.json
+└── dataflow/          # Dataflow definitions (optional)
+```
+Each JSON file follows the standard ADF resource format with `name`, `type`, and `properties` fields.
+## Development
+### Running Tests
+```bash
+# Run integration tests
+python tests/test.py
+```
+### Project Dependencies
+Core:
+- `google-genai>=1.0.0` - Google Gemini API client
+- `python-dotenv>=1.0.0` - Environment variable management
+Optional:
+- `google-cloud-aiplatform>=1.0.0` - For Vertex AI support
+## Requirements
+- Python 3.9 or higher
+- Google Gemini API key (get one at [Google AI Studio](https://makersuite.google.com/app/apikey))
+## Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.
+## License
+MIT License - see LICENSE file for details

adfmentor-0.3.0/ADFMentor.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,20 @@
+LICENSE
+README.md
+pyproject.toml
+ADFMentor.egg-info/PKG-INFO
+ADFMentor.egg-info/SOURCES.txt
+ADFMentor.egg-info/dependency_links.txt
+ADFMentor.egg-info/requires.txt
+ADFMentor.egg-info/top_level.txt
+ADFmentor/__init__.py
+ADFmentor/core.py
+ADFmentor/models/__init__.py
+ADFmentor/models/gemini.py
+ADFmentor/models/model.py
+ADFmentor/utils/__init__.py
+ADFmentor/utils/checker.py
+ADFmentor/utils/extractor.py
+ADFmentor/utils/path_handler.py
+ADFmentor/utils/processor.py
+ADFmentor/utils/question_parser.py
+tests/test.py

adfmentor-0.3.0/ADFMentor.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

adfmentor-0.3.0/ADFMentor.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ google-genai>=1.0.0
2	+ python-dotenv>=1.0.0

adfmentor-0.3.0/ADFMentor.egg-info/top_level.txt ADDED Viewed

@@ -0,0 +1,3 @@
+ADFmentor
+dist
+tests

adfmentor-0.3.0/ADFmentor/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .core import ADFMentor
+__all__ = ['ADFMentor']

adfmentor-0.3.0/ADFmentor/core.py ADDED Viewed

@@ -0,0 +1,121 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Dict, Optional
+from .models.gemini import Gemini
+from ADFmentor.utils.processor import analyze_adf
+from ADFmentor.utils.checker import get_file_by_type, is_adf_project
+from .utils.path_handler import prepare_answer_path
+from .utils.question_parser import parse_lesson_questions
+class ADFMentor:
+    def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash-exp", model: Optional[Gemini] = None):
+        if model is None:
+            model = Gemini(api_key=api_key, model_name=model_name)
+        self.model = model
+    def _evaluate_pipeline_from_path(self, working_path: str, question: Optional[str], prompt: str) -> Optional[
+        Dict[str, Any]]:
+        if question is None:
+            return None
+        path = Path(working_path)
+        if path.is_file() and path.suffix.lower() == ".json":
+            adf_path = path
+        elif path.is_dir():
+            if is_adf_project(str(path)):
+                adf_path = path
+            else:
+                json_file = get_file_by_type(working_path, ".json")
+                if not json_file:
+                    return {
+                        "score": 0,
+                        "feedback": (
+                            "Unable to evaluate submission.\n\n"
+                            "The assignment includes ADF pipeline questions, but no ADF project files (JSON) were found. "
+                            "Please ensure your submission includes all required components and resubmit."
+                        ),
+                    }
+                # Use the whole directory so analyze_adf can rglob all .json files
+                adf_path = path
+        else:
+            return {
+                "score": 0,
+                "feedback": (
+                    "Unable to evaluate submission.\n\n"
+                    "The assignment includes ADF pipeline questions, but no ADF project files were found. "
+                    "Please ensure your submission includes all required components and resubmit."
+                ),
+            }
+        return self.model.evaluate(
+            question=question,
+            answer=analyze_adf(str(adf_path)),
+            prompt=prompt,
+        )
+    def _evaluate_write_from_path(self, working_path: str, question: Optional[str], prompt: str) -> Optional[
+        Dict[str, Any]]:
+        if question is None:
+            return None
+        path = Path(working_path)
+        if path.is_file() and path.suffix.lower() == ".txt":
+            txt_path = path
+        else:
+            txt_file = get_file_by_type(working_path, ".txt")
+            if not txt_file:
+                return {
+                    "score": 0,
+                    "feedback": (
+                        "Unable to evaluate submission.\n\n"
+                        "The assignment includes written type questions, but no written type response (TXT file) was found. "
+                        "Please ensure your submission includes all required components and resubmit."
+                    ),
+                }
+            txt_path = path / txt_file
+        text_answer = txt_path.read_text(encoding="utf-8")
+        return self.model.evaluate(
+            question=question,
+            answer=text_answer,
+            prompt=prompt,
+        )
+    def evaluate_all(self, answer_path: str, questions: str, prompts: Dict[str, str]) -> Dict[str, Any]:
+        working_path = prepare_answer_path(answer_path)
+        questions = parse_lesson_questions(questions)
+        results = {
+            "pipeline": self._evaluate_pipeline_from_path(working_path, questions.get("pipeline"), prompts.get("pipeline")),
+            "text": self._evaluate_write_from_path(working_path, questions.get("text"), prompts.get("text")),
+        }
+        scores = []
+        feedback_parts = []
+        for key, value in results.items():
+            if value is not None:
+                scores.append(value['score'])
+                feedback_parts.append(
+                    f"{'=' * 70}\n"
+                    f"{key.upper()} EVALUATION\n"
+                    f"{'=' * 70}\n"
+                    f"Score: {value['score']}/100\n\n"
+                    f"{value['feedback']}\n"
+                )
+        avg_score = sum(scores) / len(scores) if scores else 0
+        summary = {
+            'score': round(avg_score, 2),
+            'feedback': '\n'.join(feedback_parts) if feedback_parts else "No evaluations completed."
+        }
+        return summary

adfmentor-0.3.0/ADFmentor/models/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Models package for ADFMentor.
+This package contains model wrappers for AI evaluation services.
+"""
+from .model import Model
+from .gemini import Gemini
+__all__ = ["Model", "Gemini"]

adfmentor-0.3.0/ADFmentor/models/gemini.py ADDED Viewed

@@ -0,0 +1,80 @@
+import json
+from pathlib import Path
+from typing import Any, Dict, Union
+from google import genai
+from google.genai import types
+from .model import Model, build_content
+class Gemini(Model):
+    """Google Gemini model wrapper for ADFMentor evaluations.
+    Uses the Google Gemini API to evaluate student submissions with
+    structured JSON responses containing scores and feedback.
+    Attributes:
+        client: Google Gemini API client
+        model_name: Name of the Gemini model to use
+        response_schema: JSON schema for structured responses
+    """
+    def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash-exp"):
+        """Initialize the Gemini model.
+        Args:
+            api_key: Your Google Gemini API key
+            model_name: Model to use (default: gemini-2.0-flash-exp)
+        """
+        super().__init__()
+        self.client = genai.Client(api_key=api_key)
+        self.model_name = model_name
+        self.response_schema = types.GenerateContentConfig(
+            response_mime_type="application/json",
+            response_schema={
+                "type": "object",
+                "properties": {
+                    "score": {
+                        "type": "number",
+                        "description": "The numerical score for the evaluation"
+                    },
+                    "feedback": {
+                        "type": "string",
+                        "description": "Detailed feedback explaining the score"
+                    }
+                },
+                "required": ["score", "feedback"]
+            }
+        )
+    def evaluate(self, question: str, answer: str, prompt: str) -> Dict[str, Any]:
+        """Evaluate a text-based answer.
+        Args:
+            question: The assignment question
+            answer: The student's answer
+            prompt: Evaluation criteria and instructions
+        Returns:
+            Dictionary with 'score' (int, 0-100) and 'feedback' (str)
+        Raises:
+            ValueError: If the model doesn't return valid JSON or missing fields
+        """
+        response = self.client.models.generate_content(
+            model=self.model_name,
+            contents=build_content(question, answer, prompt),
+            config=self.response_schema,
+        )
+        text = (response.text or "").strip()
+        try:
+            result = json.loads(text)
+            if "score" not in result or "feedback" not in result:
+                raise ValueError(f"Missing required fields (score, feedback) in response: {result}")
+            return result
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Model did not return valid JSON.\nRaw output:\n{text}") from e

adfmentor-0.3.0/ADFmentor/models/model.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""Abstract base model for ADFMentor evaluators."""
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+def build_content(question: str, answer: str, prompt: str) -> str:
+    """Build the evaluation prompt for text-based evaluations.
+    Args:
+        question: The assignment question
+        answer: The student's answer
+        prompt: Evaluation instructions
+    Returns:
+        Formatted prompt string
+    """
+    return f"""
+            Instruction:
+            {prompt.strip()}
+            Question:
+            {question.strip()}
+            Answer:
+            {answer.strip()}
+            Return ONLY valid JSON in the following format.
+            DO NOT add explanations, markdown, or extra text.
+            DO NOT wrap in ```.
+            JSON schema:
+            {{
+              "score": number (0-100),
+              "feedback": string
+            }}
+    """.strip()
+class Model(ABC):
+    """Abstract base class for AI model evaluators.
+    All model implementations must inherit from this class and implement
+    the evaluate method.
+    """
+    def __init__(self):
+        """Initialize the model."""
+        pass
+    @abstractmethod
+    def evaluate(self, question: str, answer: str, prompt: str) -> Dict[str, Any]:
+        """Evaluate an answer to a question using the AI model.
+        Args:
+            question: The question or assignment prompt
+            answer: The student's answer or solution
+            prompt: Evaluation criteria and instructions for the model
+        Returns:
+            Dictionary with 'score' (0-100) and 'feedback' (string)
+        """
+        pass

adfmentor-0.3.0/ADFmentor/utils/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Utilities package for ADFMentor.
+This package contains utility functions for processing Azure Data Factory
+files and extracting metadata.
+"""
+from .processor import analyze_adf, parse_adf_json, extract_grading_info, generate_grading_report, discover_adf_resources
+from .checker import get_file_by_type, is_adf_project
+from .extractor import extract_zip_to_temp
+__all__ = [
+    "analyze_adf",
+    "parse_adf_json",
+    "discover_adf_resources",
+    "extract_grading_info",
+    "generate_grading_report",
+    "get_file_by_type",
+    "is_adf_project",
+    "extract_zip_to_temp",
+]