langchain-google-classroom 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_google_classroom-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +25 -0
- langchain_google_classroom-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +18 -0
- langchain_google_classroom-0.1.0/.github/workflows/ci.yml +47 -0
- langchain_google_classroom-0.1.0/.gitignore +37 -0
- langchain_google_classroom-0.1.0/CHANGELOG.md +37 -0
- langchain_google_classroom-0.1.0/CONTRIBUTING.md +77 -0
- langchain_google_classroom-0.1.0/LICENSE +21 -0
- langchain_google_classroom-0.1.0/PKG-INFO +256 -0
- langchain_google_classroom-0.1.0/README.md +208 -0
- langchain_google_classroom-0.1.0/examples/basic_usage.py +48 -0
- langchain_google_classroom-0.1.0/google_classroom_langchain_loader_architecture.md +469 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/__init__.py +19 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/_utilities.py +241 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/classroom_api.py +176 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/document_builder.py +231 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/drive_resolver.py +232 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/loader.py +415 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/normalizer.py +44 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/__init__.py +62 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/docx_parser.py +58 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/image_parser.py +154 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/pdf_parser.py +197 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/text_parser.py +37 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom/py.typed +0 -0
- langchain_google_classroom-0.1.0/langchain_google_classroom_complete_plan.md +397 -0
- langchain_google_classroom-0.1.0/pyproject.toml +100 -0
- langchain_google_classroom-0.1.0/tests/__init__.py +0 -0
- langchain_google_classroom-0.1.0/tests/unit/__init__.py +0 -0
- langchain_google_classroom-0.1.0/tests/unit/test_classroom_api.py +156 -0
- langchain_google_classroom-0.1.0/tests/unit/test_document_builder.py +186 -0
- langchain_google_classroom-0.1.0/tests/unit/test_drive_resolver.py +250 -0
- langchain_google_classroom-0.1.0/tests/unit/test_loader.py +679 -0
- langchain_google_classroom-0.1.0/tests/unit/test_normalizer.py +39 -0
- langchain_google_classroom-0.1.0/tests/unit/test_parsers.py +426 -0
- langchain_google_classroom-0.1.0/tests/unit/test_utilities.py +301 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug Report
|
|
3
|
+
about: Report a bug in langchain-google-classroom
|
|
4
|
+
title: "[BUG] "
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ""
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Description
|
|
10
|
+
A clear description of the bug.
|
|
11
|
+
|
|
12
|
+
## Steps to Reproduce
|
|
13
|
+
1. ...
|
|
14
|
+
2. ...
|
|
15
|
+
|
|
16
|
+
## Expected Behavior
|
|
17
|
+
What you expected to happen.
|
|
18
|
+
|
|
19
|
+
## Actual Behavior
|
|
20
|
+
What actually happened. Include error traceback if applicable.
|
|
21
|
+
|
|
22
|
+
## Environment
|
|
23
|
+
- Python version:
|
|
24
|
+
- langchain-google-classroom version:
|
|
25
|
+
- OS:
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature Request
|
|
3
|
+
about: Suggest a new feature
|
|
4
|
+
title: "[FEATURE] "
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ""
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Description
|
|
10
|
+
A clear description of the feature you'd like.
|
|
11
|
+
|
|
12
|
+
## Use Case
|
|
13
|
+
Why this feature would be useful. What problem does it solve?
|
|
14
|
+
|
|
15
|
+
## Proposed API
|
|
16
|
+
```python
|
|
17
|
+
# How you'd expect to use this feature
|
|
18
|
+
```
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
lint:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.12"]
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: ${{ matrix.python-version }}
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[lint]"
|
|
28
|
+
- name: Lint with ruff
|
|
29
|
+
run: ruff check langchain_google_classroom/ tests/
|
|
30
|
+
|
|
31
|
+
test:
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
strategy:
|
|
34
|
+
matrix:
|
|
35
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/checkout@v4
|
|
38
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
39
|
+
uses: actions/setup-python@v5
|
|
40
|
+
with:
|
|
41
|
+
python-version: ${{ matrix.python-version }}
|
|
42
|
+
- name: Install dependencies
|
|
43
|
+
run: |
|
|
44
|
+
python -m pip install --upgrade pip
|
|
45
|
+
pip install -e ".[dev]"
|
|
46
|
+
- name: Run tests
|
|
47
|
+
run: pytest tests/unit/ -v --tb=short
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Distribution / packaging
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
|
|
17
|
+
# IDE
|
|
18
|
+
.idea/
|
|
19
|
+
.vscode/
|
|
20
|
+
*.swp
|
|
21
|
+
*.swo
|
|
22
|
+
|
|
23
|
+
# Testing
|
|
24
|
+
.pytest_cache/
|
|
25
|
+
.coverage
|
|
26
|
+
htmlcov/
|
|
27
|
+
.mypy_cache/
|
|
28
|
+
|
|
29
|
+
# OS
|
|
30
|
+
.DS_Store
|
|
31
|
+
Thumbs.db
|
|
32
|
+
|
|
33
|
+
# Credentials (never commit)
|
|
34
|
+
token.json
|
|
35
|
+
credentials.json
|
|
36
|
+
service_account.json
|
|
37
|
+
*.json.bak
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and
|
|
6
|
+
this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] โ 2026-03-13
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **`GoogleClassroomLoader`** โ LangChain `BaseLoader` for Google Classroom.
|
|
13
|
+
- Loads assignments (courseWork), announcements, and course materials.
|
|
14
|
+
- Selective loading via `load_assignments`, `load_announcements`, `load_materials` flags.
|
|
15
|
+
- Multi-course support with `course_ids` filter.
|
|
16
|
+
- **Drive attachment resolution** via `DriveAttachmentResolver`.
|
|
17
|
+
- Google-native files (Docs, Slides, Sheets) exported as plain text / CSV.
|
|
18
|
+
- Binary files (PDF, DOCX, etc.) downloaded via streaming.
|
|
19
|
+
- Controlled by `load_attachments` and `parse_attachments` flags.
|
|
20
|
+
- **File parser layer** using LangChain's `BaseBlobParser` + `Blob` interface.
|
|
21
|
+
- `PDFParser` (pypdf), `DocxParser` (python-docx), `TextParser` (built-in),
|
|
22
|
+
`ImageParser` (vision LLM).
|
|
23
|
+
- MIME-type registry with `get_parser()` factory function.
|
|
24
|
+
- `file_parser_cls` param for user-pluggable parsers (e.g. `PyMuPDFParser`).
|
|
25
|
+
- **Vision LLM image description** for images embedded in PDFs and standalone
|
|
26
|
+
image attachments. Pass a `vision_model` (e.g. Gemini, GPT-4V) to get
|
|
27
|
+
automatic image descriptions appended to page text.
|
|
28
|
+
- **Retry / backoff** via `execute_with_retry()` on all Google API calls.
|
|
29
|
+
- Exponential backoff with jitter on HTTP 429, 500, 503.
|
|
30
|
+
- **Authentication** support for service accounts, cached OAuth tokens, and interactive
|
|
31
|
+
OAuth flow.
|
|
32
|
+
- **Text normalizer** โ NFC, line ending cleanup, null byte removal.
|
|
33
|
+
- **`Document` metadata** โ rich metadata including course info, timestamps, due dates,
|
|
34
|
+
links, and parent item references for attachments.
|
|
35
|
+
- **Project packaging** โ `pyproject.toml` with hatchling, optional dependency groups
|
|
36
|
+
(`[parsers]`, `[test]`, `[lint]`, `[typing]`, `[dev]`), PEP 561 `py.typed` marker.
|
|
37
|
+
- **92 unit tests** covering all modules with 100% pass rate.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Contributing to langchain-google-classroom
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing! This guide will help you get started.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Clone the repository
|
|
9
|
+
git clone https://github.com/ayanokojix21/langchain-google-classroom.git
|
|
10
|
+
cd langchain-google-classroom
|
|
11
|
+
|
|
12
|
+
# Create virtual environment
|
|
13
|
+
python -m venv .venv
|
|
14
|
+
source .venv/bin/activate # Linux/macOS
|
|
15
|
+
# .venv\Scripts\activate # Windows
|
|
16
|
+
|
|
17
|
+
# Install in development mode
|
|
18
|
+
pip install -e ".[dev]"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Running Tests
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Run all unit tests
|
|
25
|
+
pytest tests/unit/ -v
|
|
26
|
+
|
|
27
|
+
# Run a specific test file
|
|
28
|
+
pytest tests/unit/test_parsers.py -v
|
|
29
|
+
|
|
30
|
+
# Run with coverage
|
|
31
|
+
pytest tests/unit/ --cov=langchain_google_classroom --cov-report=term-missing
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Linting
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Check for lint errors
|
|
38
|
+
ruff check langchain_google_classroom/ tests/
|
|
39
|
+
|
|
40
|
+
# Auto-fix issues
|
|
41
|
+
ruff check langchain_google_classroom/ tests/ --fix
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Code Style
|
|
45
|
+
|
|
46
|
+
This project follows LangChain coding conventions:
|
|
47
|
+
|
|
48
|
+
- **Type annotations** on all functions and methods
|
|
49
|
+
- **Docstrings** in Google/NumPy style on all public functions
|
|
50
|
+
- **`from __future__ import annotations`** at the top of every module
|
|
51
|
+
- **`guard_import`** for optional dependencies (pypdf, python-docx)
|
|
52
|
+
- **`BaseBlobParser` + `Blob`** interface for all file parsers
|
|
53
|
+
- **`BaseLoader.lazy_load()`** as the main entry point
|
|
54
|
+
|
|
55
|
+
## Adding a New Parser
|
|
56
|
+
|
|
57
|
+
1. Create `langchain_google_classroom/parsers/your_parser.py`
|
|
58
|
+
2. Implement `BaseBlobParser.lazy_parse(blob)` โ `Iterator[Document]`
|
|
59
|
+
3. Add the MIME type mapping in `parsers/__init__.py`
|
|
60
|
+
4. Add tests in `tests/unit/test_parsers.py`
|
|
61
|
+
5. Run `pytest` and `ruff check`
|
|
62
|
+
|
|
63
|
+
## Pull Request Process
|
|
64
|
+
|
|
65
|
+
1. Fork the repository
|
|
66
|
+
2. Create a feature branch (`git checkout -b feature/my-feature`)
|
|
67
|
+
3. Make your changes with tests
|
|
68
|
+
4. Run `pytest tests/unit/ -v` and `ruff check`
|
|
69
|
+
5. Commit with a descriptive message
|
|
70
|
+
6. Push and open a Pull Request
|
|
71
|
+
|
|
72
|
+
## Reporting Issues
|
|
73
|
+
|
|
74
|
+
Use [GitHub Issues](https://github.com/ayanokojix21/langchain-google-classroom/issues) with:
|
|
75
|
+
|
|
76
|
+
- **Bug reports**: Include Python version, error traceback, and steps to reproduce
|
|
77
|
+
- **Feature requests**: Describe the use case and proposed API
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Nishchal Chandel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain-google-classroom
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An integration package connecting Google Classroom and LangChain
|
|
5
|
+
Project-URL: Homepage, https://github.com/ayanokojix21/langchain-google-classroom
|
|
6
|
+
Project-URL: Source, https://github.com/ayanokojix21/langchain-google-classroom
|
|
7
|
+
Project-URL: Documentation, https://github.com/ayanokojix21/langchain-google-classroom#readme
|
|
8
|
+
Author: Nishchal Chandel
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: document-loader,education,google-classroom,langchain,rag
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Education
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Requires-Dist: google-api-python-client<3.0.0,>=2.100.0
|
|
24
|
+
Requires-Dist: google-auth-httplib2<1.0.0,>=0.2.0
|
|
25
|
+
Requires-Dist: google-auth-oauthlib<2.0.0,>=1.2.0
|
|
26
|
+
Requires-Dist: google-auth<3.0.0,>=2.25.0
|
|
27
|
+
Requires-Dist: langchain-core<1.0.0,>=0.3.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: mypy<2.0.0,>=1.10.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pypdf<5.0.0,>=4.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-mock<4.0.0,>=3.10.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest<10.0.0,>=7.3.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: python-docx<2.0.0,>=1.0.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff<1,>=0.5.0; extra == 'dev'
|
|
35
|
+
Provides-Extra: lint
|
|
36
|
+
Requires-Dist: ruff<1,>=0.5.0; extra == 'lint'
|
|
37
|
+
Provides-Extra: parsers
|
|
38
|
+
Requires-Dist: pypdf<5.0.0,>=4.0.0; extra == 'parsers'
|
|
39
|
+
Requires-Dist: python-docx<2.0.0,>=1.0.0; extra == 'parsers'
|
|
40
|
+
Provides-Extra: test
|
|
41
|
+
Requires-Dist: pytest-asyncio<2.0.0,>=0.21.1; extra == 'test'
|
|
42
|
+
Requires-Dist: pytest-mock<4.0.0,>=3.10.0; extra == 'test'
|
|
43
|
+
Requires-Dist: pytest-socket<1.0.0,>=0.7.0; extra == 'test'
|
|
44
|
+
Requires-Dist: pytest<10.0.0,>=7.3.0; extra == 'test'
|
|
45
|
+
Provides-Extra: typing
|
|
46
|
+
Requires-Dist: mypy<2.0.0,>=1.10.0; extra == 'typing'
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
|
|
49
|
+
# ๐ langchain-google-classroom
|
|
50
|
+
|
|
51
|
+
[](https://github.com/ayanokojix21/langchain-google-classroom/actions/workflows/ci.yml)
|
|
52
|
+
[](https://pypi.org/project/langchain-google-classroom/)
|
|
53
|
+
[](https://pypi.org/project/langchain-google-classroom/)
|
|
54
|
+
[](https://opensource.org/licenses/MIT)
|
|
55
|
+
|
|
56
|
+
A **LangChain** integration package that loads Google Classroom content โ assignments, announcements, course materials, and Drive attachments โ as `Document` objects for RAG pipelines, semantic search, AI teaching assistants, and course chatbots.
|
|
57
|
+
|
|
58
|
+
## โจ Features
|
|
59
|
+
|
|
60
|
+
- **Full Classroom coverage** โ assignments, announcements, and course materials
|
|
61
|
+
- **Drive attachments** โ auto-download and parse PDF, DOCX, text, CSV, HTML files
|
|
62
|
+
- **Vision LLM image description** โ embedded PDF images described by Gemini/GPT-4V
|
|
63
|
+
- **Pluggable parsers** โ bring your own `BaseBlobParser` (PyMuPDF, Unstructured, etc.)
|
|
64
|
+
- **Retry/backoff** โ exponential backoff with jitter on rate-limited API calls
|
|
65
|
+
- **Flexible auth** โ service accounts, OAuth, cached tokens, or pre-built credentials
|
|
66
|
+
- **Rich metadata** โ course info, timestamps, due dates, links on every Document
|
|
67
|
+
- **Lazy loading** โ memory-efficient streaming via `lazy_load()`
|
|
68
|
+
|
|
69
|
+
## ๐ฆ Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install langchain-google-classroom
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
With file attachment parsing (PDF, DOCX):
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install langchain-google-classroom[parsers]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## ๐ Quickstart
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from langchain_google_classroom import GoogleClassroomLoader
|
|
85
|
+
|
|
86
|
+
# Load all accessible courses
|
|
87
|
+
loader = GoogleClassroomLoader()
|
|
88
|
+
docs = loader.load()
|
|
89
|
+
|
|
90
|
+
for doc in docs:
|
|
91
|
+
print(doc.metadata["content_type"], "โ", doc.metadata["title"])
|
|
92
|
+
print(doc.page_content[:200])
|
|
93
|
+
print()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## ๐ Authentication
|
|
97
|
+
|
|
98
|
+
### Service Account (recommended for production)
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
loader = GoogleClassroomLoader(
|
|
102
|
+
service_account_file="service_account.json",
|
|
103
|
+
)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### OAuth User Credentials
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
loader = GoogleClassroomLoader(
|
|
110
|
+
client_secrets_file="credentials.json",
|
|
111
|
+
token_file="token.json",
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Pre-built Credentials
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from google.oauth2 import service_account
|
|
119
|
+
|
|
120
|
+
creds = service_account.Credentials.from_service_account_file(
|
|
121
|
+
"service_account.json",
|
|
122
|
+
scopes=["https://www.googleapis.com/auth/classroom.courses.readonly"],
|
|
123
|
+
)
|
|
124
|
+
loader = GoogleClassroomLoader(credentials=creds)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## ๐ Attachments & File Parsing
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
loader = GoogleClassroomLoader(
|
|
131
|
+
course_ids=["123456789"],
|
|
132
|
+
load_attachments=True, # Download Drive files
|
|
133
|
+
parse_attachments=True, # Parse with BaseBlobParser
|
|
134
|
+
)
|
|
135
|
+
docs = loader.load()
|
|
136
|
+
# Yields: assignment docs + parsed PDF/DOCX/text attachment docs
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Custom Parser
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from langchain_community.document_loaders.parsers.pdf import PyMuPDFParser
|
|
143
|
+
|
|
144
|
+
loader = GoogleClassroomLoader(
|
|
145
|
+
course_ids=["123456789"],
|
|
146
|
+
file_parser_cls=PyMuPDFParser,
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## ๐ผ๏ธ Vision LLM โ Image Description
|
|
151
|
+
|
|
152
|
+
Extract and describe images embedded in PDFs using any vision-capable LLM:
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
156
|
+
|
|
157
|
+
loader = GoogleClassroomLoader(
|
|
158
|
+
course_ids=["123456789"],
|
|
159
|
+
load_attachments=True,
|
|
160
|
+
vision_model=ChatGoogleGenerativeAI(model="gemini-2.0-flash"),
|
|
161
|
+
)
|
|
162
|
+
docs = loader.load()
|
|
163
|
+
# PDF pages now include: "[Image: chart.png]\nA bar chart showing student grades..."
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## ๐ฏ Selective Loading
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
loader = GoogleClassroomLoader(
|
|
170
|
+
course_ids=["123456789"],
|
|
171
|
+
load_assignments=True,
|
|
172
|
+
load_announcements=False,
|
|
173
|
+
load_materials=False,
|
|
174
|
+
load_attachments=False,
|
|
175
|
+
)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## ๐ Document Structure
|
|
179
|
+
|
|
180
|
+
Each document includes rich metadata:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
Document(
|
|
184
|
+
page_content="Assignment: Homework 3\n\nComplete exercises 1-5...",
|
|
185
|
+
metadata={
|
|
186
|
+
"source": "google_classroom",
|
|
187
|
+
"course_id": "12345",
|
|
188
|
+
"course_name": "Machine Learning",
|
|
189
|
+
"content_type": "assignment", # or "announcement", "material", "assignment_attachment"
|
|
190
|
+
"title": "Homework 3",
|
|
191
|
+
"item_id": "67890",
|
|
192
|
+
"created_time": "2024-01-15T10:00:00Z",
|
|
193
|
+
"updated_time": "2024-01-15T10:00:00Z",
|
|
194
|
+
"due_date": "2024-01-22T23:59:00", # assignments only
|
|
195
|
+
"max_points": 100.0, # assignments only
|
|
196
|
+
"alternate_link": "https://classroom.google.com/...",
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## โ๏ธ Configuration Reference
|
|
202
|
+
|
|
203
|
+
| Parameter | Type | Default | Description |
|
|
204
|
+
|-----------|------|---------|-------------|
|
|
205
|
+
| `course_ids` | `list[str]` | `None` | Specific course IDs (`None` = all accessible) |
|
|
206
|
+
| `load_assignments` | `bool` | `True` | Load courseWork items |
|
|
207
|
+
| `load_announcements` | `bool` | `True` | Load announcements |
|
|
208
|
+
| `load_materials` | `bool` | `True` | Load courseWorkMaterials |
|
|
209
|
+
| `load_attachments` | `bool` | `True` | Download and process Drive attachments |
|
|
210
|
+
| `parse_attachments` | `bool` | `True` | Parse files with BaseBlobParser |
|
|
211
|
+
| `load_images` | `bool` | `False` | Process image MIME types |
|
|
212
|
+
| `vision_model` | `BaseChatModel` | `None` | Vision LLM for image description |
|
|
213
|
+
| `image_prompt` | `str` | `None` | Custom prompt for vision model |
|
|
214
|
+
| `file_parser_cls` | `type[BaseBlobParser]` | `None` | Custom parser for all attachments |
|
|
215
|
+
| `file_parser_kwargs` | `dict` | `None` | kwargs for custom parser |
|
|
216
|
+
| `credentials` | `Credentials` | `None` | Pre-built Google credentials |
|
|
217
|
+
| `service_account_file` | `str` | `None` | Service account key JSON path |
|
|
218
|
+
| `token_file` | `str` | `None` | Cached OAuth token path |
|
|
219
|
+
| `client_secrets_file` | `str` | `None` | OAuth client secrets path |
|
|
220
|
+
| `scopes` | `list[str]` | Read-only | API scopes to request |
|
|
221
|
+
|
|
222
|
+
## ๐๏ธ Architecture
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
GoogleClassroomLoader (BaseLoader)
|
|
226
|
+
โโโ _utilities.py โ auth, retry/backoff, guard_import
|
|
227
|
+
โโโ classroom_api.py โ paginated Classroom API fetcher
|
|
228
|
+
โโโ document_builder.py โ raw API โ LangChain Document
|
|
229
|
+
โโโ drive_resolver.py โ Drive download/export
|
|
230
|
+
โโโ normalizer.py โ text cleanup (Unicode NFC, whitespace)
|
|
231
|
+
โโโ parsers/
|
|
232
|
+
โโโ __init__.py โ MIME registry + get_parser()
|
|
233
|
+
โโโ pdf_parser.py โ pypdf + vision LLM
|
|
234
|
+
โโโ docx_parser.py โ python-docx
|
|
235
|
+
โโโ text_parser.py โ built-in UTF-8
|
|
236
|
+
โโโ image_parser.py โ vision LLM + base64 fallback
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## ๐งช Development
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
# Clone and install
|
|
243
|
+
git clone https://github.com/ayanokojix21/langchain-google-classroom.git
|
|
244
|
+
cd langchain-google-classroom
|
|
245
|
+
pip install -e ".[dev]"
|
|
246
|
+
|
|
247
|
+
# Run tests
|
|
248
|
+
pytest tests/unit/ -v
|
|
249
|
+
|
|
250
|
+
# Lint
|
|
251
|
+
ruff check langchain_google_classroom/ tests/
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## ๐ License
|
|
255
|
+
|
|
256
|
+
MIT โ see [LICENSE](LICENSE) for details.
|