langchain-google-classroom 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. langchain_google_classroom-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +25 -0
  2. langchain_google_classroom-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +18 -0
  3. langchain_google_classroom-0.1.0/.github/workflows/ci.yml +47 -0
  4. langchain_google_classroom-0.1.0/.gitignore +37 -0
  5. langchain_google_classroom-0.1.0/CHANGELOG.md +37 -0
  6. langchain_google_classroom-0.1.0/CONTRIBUTING.md +77 -0
  7. langchain_google_classroom-0.1.0/LICENSE +21 -0
  8. langchain_google_classroom-0.1.0/PKG-INFO +256 -0
  9. langchain_google_classroom-0.1.0/README.md +208 -0
  10. langchain_google_classroom-0.1.0/examples/basic_usage.py +48 -0
  11. langchain_google_classroom-0.1.0/google_classroom_langchain_loader_architecture.md +469 -0
  12. langchain_google_classroom-0.1.0/langchain_google_classroom/__init__.py +19 -0
  13. langchain_google_classroom-0.1.0/langchain_google_classroom/_utilities.py +241 -0
  14. langchain_google_classroom-0.1.0/langchain_google_classroom/classroom_api.py +176 -0
  15. langchain_google_classroom-0.1.0/langchain_google_classroom/document_builder.py +231 -0
  16. langchain_google_classroom-0.1.0/langchain_google_classroom/drive_resolver.py +232 -0
  17. langchain_google_classroom-0.1.0/langchain_google_classroom/loader.py +415 -0
  18. langchain_google_classroom-0.1.0/langchain_google_classroom/normalizer.py +44 -0
  19. langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/__init__.py +62 -0
  20. langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/docx_parser.py +58 -0
  21. langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/image_parser.py +154 -0
  22. langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/pdf_parser.py +197 -0
  23. langchain_google_classroom-0.1.0/langchain_google_classroom/parsers/text_parser.py +37 -0
  24. langchain_google_classroom-0.1.0/langchain_google_classroom/py.typed +0 -0
  25. langchain_google_classroom-0.1.0/langchain_google_classroom_complete_plan.md +397 -0
  26. langchain_google_classroom-0.1.0/pyproject.toml +100 -0
  27. langchain_google_classroom-0.1.0/tests/__init__.py +0 -0
  28. langchain_google_classroom-0.1.0/tests/unit/__init__.py +0 -0
  29. langchain_google_classroom-0.1.0/tests/unit/test_classroom_api.py +156 -0
  30. langchain_google_classroom-0.1.0/tests/unit/test_document_builder.py +186 -0
  31. langchain_google_classroom-0.1.0/tests/unit/test_drive_resolver.py +250 -0
  32. langchain_google_classroom-0.1.0/tests/unit/test_loader.py +679 -0
  33. langchain_google_classroom-0.1.0/tests/unit/test_normalizer.py +39 -0
  34. langchain_google_classroom-0.1.0/tests/unit/test_parsers.py +426 -0
  35. langchain_google_classroom-0.1.0/tests/unit/test_utilities.py +301 -0
@@ -0,0 +1,25 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Report a bug in langchain-google-classroom
4
+ title: "[BUG] "
5
+ labels: bug
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Description
10
+ A clear description of the bug.
11
+
12
+ ## Steps to Reproduce
13
+ 1. ...
14
+ 2. ...
15
+
16
+ ## Expected Behavior
17
+ What you expected to happen.
18
+
19
+ ## Actual Behavior
20
+ What actually happened. Include error traceback if applicable.
21
+
22
+ ## Environment
23
+ - Python version:
24
+ - langchain-google-classroom version:
25
+ - OS:
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature
4
+ title: "[FEATURE] "
5
+ labels: enhancement
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Description
10
+ A clear description of the feature you'd like.
11
+
12
+ ## Use Case
13
+ Why this feature would be useful. What problem does it solve?
14
+
15
+ ## Proposed API
16
+ ```python
17
+ # How you'd expect to use this feature
18
+ ```
@@ -0,0 +1,47 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ lint:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ python-version: ["3.12"]
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -e ".[lint]"
28
+ - name: Lint with ruff
29
+ run: ruff check langchain_google_classroom/ tests/
30
+
31
+ test:
32
+ runs-on: ubuntu-latest
33
+ strategy:
34
+ matrix:
35
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
36
+ steps:
37
+ - uses: actions/checkout@v4
38
+ - name: Set up Python ${{ matrix.python-version }}
39
+ uses: actions/setup-python@v5
40
+ with:
41
+ python-version: ${{ matrix.python-version }}
42
+ - name: Install dependencies
43
+ run: |
44
+ python -m pip install --upgrade pip
45
+ pip install -e ".[dev]"
46
+ - name: Run tests
47
+ run: pytest tests/unit/ -v --tb=short
@@ -0,0 +1,37 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ dist/
8
+ build/
9
+ *.egg-info/
10
+ *.egg
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
21
+ *.swo
22
+
23
+ # Testing
24
+ .pytest_cache/
25
+ .coverage
26
+ htmlcov/
27
+ .mypy_cache/
28
+
29
+ # OS
30
+ .DS_Store
31
+ Thumbs.db
32
+
33
+ # Credentials (never commit)
34
+ token.json
35
+ credentials.json
36
+ service_account.json
37
+ *.json.bak
@@ -0,0 +1,37 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and
6
+ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] โ€” 2026-03-13
9
+
10
+ ### Added
11
+
12
+ - **`GoogleClassroomLoader`** โ€” LangChain `BaseLoader` for Google Classroom.
13
+ - Loads assignments (courseWork), announcements, and course materials.
14
+ - Selective loading via `load_assignments`, `load_announcements`, `load_materials` flags.
15
+ - Multi-course support with `course_ids` filter.
16
+ - **Drive attachment resolution** via `DriveAttachmentResolver`.
17
+ - Google-native files (Docs, Slides, Sheets) exported as plain text / CSV.
18
+ - Binary files (PDF, DOCX, etc.) downloaded via streaming.
19
+ - Controlled by `load_attachments` and `parse_attachments` flags.
20
+ - **File parser layer** using LangChain's `BaseBlobParser` + `Blob` interface.
21
+ - `PDFParser` (pypdf), `DocxParser` (python-docx), `TextParser` (built-in),
22
+ `ImageParser` (vision LLM).
23
+ - MIME-type registry with `get_parser()` factory function.
24
+ - `file_parser_cls` param for user-pluggable parsers (e.g. `PyMuPDFParser`).
25
+ - **Vision LLM image description** for images embedded in PDFs and standalone
26
+ image attachments. Pass a `vision_model` (e.g. Gemini, GPT-4V) to get
27
+ automatic image descriptions appended to page text.
28
+ - **Retry / backoff** via `execute_with_retry()` on all Google API calls.
29
+ - Exponential backoff with jitter on HTTP 429, 500, 503.
30
+ - **Authentication** support for service accounts, cached OAuth tokens, and interactive
31
+ OAuth flow.
32
+ - **Text normalizer** โ€” NFC, line ending cleanup, null byte removal.
33
+ - **`Document` metadata** โ€” rich metadata including course info, timestamps, due dates,
34
+ links, and parent item references for attachments.
35
+ - **Project packaging** โ€” `pyproject.toml` with hatchling, optional dependency groups
36
+ (`[parsers]`, `[test]`, `[lint]`, `[typing]`, `[dev]`), PEP 561 `py.typed` marker.
37
+ - **92 unit tests** covering all modules with 100% pass rate.
@@ -0,0 +1,77 @@
1
+ # Contributing to langchain-google-classroom
2
+
3
+ Thank you for your interest in contributing! This guide will help you get started.
4
+
5
+ ## Development Setup
6
+
7
+ ```bash
8
+ # Clone the repository
9
+ git clone https://github.com/ayanokojix21/langchain-google-classroom.git
10
+ cd langchain-google-classroom
11
+
12
+ # Create virtual environment
13
+ python -m venv .venv
14
+ source .venv/bin/activate # Linux/macOS
15
+ # .venv\Scripts\activate # Windows
16
+
17
+ # Install in development mode
18
+ pip install -e ".[dev]"
19
+ ```
20
+
21
+ ## Running Tests
22
+
23
+ ```bash
24
+ # Run all unit tests
25
+ pytest tests/unit/ -v
26
+
27
+ # Run a specific test file
28
+ pytest tests/unit/test_parsers.py -v
29
+
30
+ # Run with coverage
31
+ pytest tests/unit/ --cov=langchain_google_classroom --cov-report=term-missing
32
+ ```
33
+
34
+ ## Linting
35
+
36
+ ```bash
37
+ # Check for lint errors
38
+ ruff check langchain_google_classroom/ tests/
39
+
40
+ # Auto-fix issues
41
+ ruff check langchain_google_classroom/ tests/ --fix
42
+ ```
43
+
44
+ ## Code Style
45
+
46
+ This project follows LangChain coding conventions:
47
+
48
+ - **Type annotations** on all functions and methods
49
+ - **Docstrings** in Google/NumPy style on all public functions
50
+ - **`from __future__ import annotations`** at the top of every module
51
+ - **`guard_import`** for optional dependencies (pypdf, python-docx)
52
+ - **`BaseBlobParser` + `Blob`** interface for all file parsers
53
+ - **`BaseLoader.lazy_load()`** as the main entry point
54
+
55
+ ## Adding a New Parser
56
+
57
+ 1. Create `langchain_google_classroom/parsers/your_parser.py`
58
+ 2. Implement `BaseBlobParser.lazy_parse(blob)` โ†’ `Iterator[Document]`
59
+ 3. Add the MIME type mapping in `parsers/__init__.py`
60
+ 4. Add tests in `tests/unit/test_parsers.py`
61
+ 5. Run `pytest` and `ruff check`
62
+
63
+ ## Pull Request Process
64
+
65
+ 1. Fork the repository
66
+ 2. Create a feature branch (`git checkout -b feature/my-feature`)
67
+ 3. Make your changes with tests
68
+ 4. Run `pytest tests/unit/ -v` and `ruff check`
69
+ 5. Commit with a descriptive message
70
+ 6. Push and open a Pull Request
71
+
72
+ ## Reporting Issues
73
+
74
+ Use [GitHub Issues](https://github.com/ayanokojix21/langchain-google-classroom/issues) with:
75
+
76
+ - **Bug reports**: Include Python version, error traceback, and steps to reproduce
77
+ - **Feature requests**: Describe the use case and proposed API
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Nishchal Chandel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,256 @@
1
+ Metadata-Version: 2.4
2
+ Name: langchain-google-classroom
3
+ Version: 0.1.0
4
+ Summary: An integration package connecting Google Classroom and LangChain
5
+ Project-URL: Homepage, https://github.com/ayanokojix21/langchain-google-classroom
6
+ Project-URL: Source, https://github.com/ayanokojix21/langchain-google-classroom
7
+ Project-URL: Documentation, https://github.com/ayanokojix21/langchain-google-classroom#readme
8
+ Author: Nishchal Chandel
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: document-loader,education,google-classroom,langchain,rag
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Education
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.9
23
+ Requires-Dist: google-api-python-client<3.0.0,>=2.100.0
24
+ Requires-Dist: google-auth-httplib2<1.0.0,>=0.2.0
25
+ Requires-Dist: google-auth-oauthlib<2.0.0,>=1.2.0
26
+ Requires-Dist: google-auth<3.0.0,>=2.25.0
27
+ Requires-Dist: langchain-core<1.0.0,>=0.3.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy<2.0.0,>=1.10.0; extra == 'dev'
30
+ Requires-Dist: pypdf<5.0.0,>=4.0.0; extra == 'dev'
31
+ Requires-Dist: pytest-mock<4.0.0,>=3.10.0; extra == 'dev'
32
+ Requires-Dist: pytest<10.0.0,>=7.3.0; extra == 'dev'
33
+ Requires-Dist: python-docx<2.0.0,>=1.0.0; extra == 'dev'
34
+ Requires-Dist: ruff<1,>=0.5.0; extra == 'dev'
35
+ Provides-Extra: lint
36
+ Requires-Dist: ruff<1,>=0.5.0; extra == 'lint'
37
+ Provides-Extra: parsers
38
+ Requires-Dist: pypdf<5.0.0,>=4.0.0; extra == 'parsers'
39
+ Requires-Dist: python-docx<2.0.0,>=1.0.0; extra == 'parsers'
40
+ Provides-Extra: test
41
+ Requires-Dist: pytest-asyncio<2.0.0,>=0.21.1; extra == 'test'
42
+ Requires-Dist: pytest-mock<4.0.0,>=3.10.0; extra == 'test'
43
+ Requires-Dist: pytest-socket<1.0.0,>=0.7.0; extra == 'test'
44
+ Requires-Dist: pytest<10.0.0,>=7.3.0; extra == 'test'
45
+ Provides-Extra: typing
46
+ Requires-Dist: mypy<2.0.0,>=1.10.0; extra == 'typing'
47
+ Description-Content-Type: text/markdown
48
+
49
+ # ๐ŸŽ“ langchain-google-classroom
50
+
51
+ [![CI](https://github.com/ayanokojix21/langchain-google-classroom/actions/workflows/ci.yml/badge.svg)](https://github.com/ayanokojix21/langchain-google-classroom/actions/workflows/ci.yml)
52
+ [![PyPI version](https://img.shields.io/pypi/v/langchain-google-classroom.svg)](https://pypi.org/project/langchain-google-classroom/)
53
+ [![Python](https://img.shields.io/pypi/pyversions/langchain-google-classroom.svg)](https://pypi.org/project/langchain-google-classroom/)
54
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
55
+
56
+ A **LangChain** integration package that loads Google Classroom content โ€” assignments, announcements, course materials, and Drive attachments โ€” as `Document` objects for RAG pipelines, semantic search, AI teaching assistants, and course chatbots.
57
+
58
+ ## โœจ Features
59
+
60
+ - **Full Classroom coverage** โ€” assignments, announcements, and course materials
61
+ - **Drive attachments** โ€” auto-download and parse PDF, DOCX, text, CSV, HTML files
62
+ - **Vision LLM image description** โ€” embedded PDF images described by Gemini/GPT-4V
63
+ - **Pluggable parsers** โ€” bring your own `BaseBlobParser` (PyMuPDF, Unstructured, etc.)
64
+ - **Retry/backoff** โ€” exponential backoff with jitter on rate-limited API calls
65
+ - **Flexible auth** โ€” service accounts, OAuth, cached tokens, or pre-built credentials
66
+ - **Rich metadata** โ€” course info, timestamps, due dates, links on every Document
67
+ - **Lazy loading** โ€” memory-efficient streaming via `lazy_load()`
68
+
69
+ ## ๐Ÿ“ฆ Installation
70
+
71
+ ```bash
72
+ pip install langchain-google-classroom
73
+ ```
74
+
75
+ With file attachment parsing (PDF, DOCX):
76
+
77
+ ```bash
78
+ pip install langchain-google-classroom[parsers]
79
+ ```
80
+
81
+ ## ๐Ÿš€ Quickstart
82
+
83
+ ```python
84
+ from langchain_google_classroom import GoogleClassroomLoader
85
+
86
+ # Load all accessible courses
87
+ loader = GoogleClassroomLoader()
88
+ docs = loader.load()
89
+
90
+ for doc in docs:
91
+ print(doc.metadata["content_type"], "โ€”", doc.metadata["title"])
92
+ print(doc.page_content[:200])
93
+ print()
94
+ ```
95
+
96
+ ## ๐Ÿ” Authentication
97
+
98
+ ### Service Account (recommended for production)
99
+
100
+ ```python
101
+ loader = GoogleClassroomLoader(
102
+ service_account_file="service_account.json",
103
+ )
104
+ ```
105
+
106
+ ### OAuth User Credentials
107
+
108
+ ```python
109
+ loader = GoogleClassroomLoader(
110
+ client_secrets_file="credentials.json",
111
+ token_file="token.json",
112
+ )
113
+ ```
114
+
115
+ ### Pre-built Credentials
116
+
117
+ ```python
118
+ from google.oauth2 import service_account
119
+
120
+ creds = service_account.Credentials.from_service_account_file(
121
+ "service_account.json",
122
+ scopes=["https://www.googleapis.com/auth/classroom.courses.readonly"],
123
+ )
124
+ loader = GoogleClassroomLoader(credentials=creds)
125
+ ```
126
+
127
+ ## ๐Ÿ“Ž Attachments & File Parsing
128
+
129
+ ```python
130
+ loader = GoogleClassroomLoader(
131
+ course_ids=["123456789"],
132
+ load_attachments=True, # Download Drive files
133
+ parse_attachments=True, # Parse with BaseBlobParser
134
+ )
135
+ docs = loader.load()
136
+ # Yields: assignment docs + parsed PDF/DOCX/text attachment docs
137
+ ```
138
+
139
+ ### Custom Parser
140
+
141
+ ```python
142
+ from langchain_community.document_loaders.parsers.pdf import PyMuPDFParser
143
+
144
+ loader = GoogleClassroomLoader(
145
+ course_ids=["123456789"],
146
+ file_parser_cls=PyMuPDFParser,
147
+ )
148
+ ```
149
+
150
+ ## ๐Ÿ–ผ๏ธ Vision LLM โ€” Image Description
151
+
152
+ Extract and describe images embedded in PDFs using any vision-capable LLM:
153
+
154
+ ```python
155
+ from langchain_google_genai import ChatGoogleGenerativeAI
156
+
157
+ loader = GoogleClassroomLoader(
158
+ course_ids=["123456789"],
159
+ load_attachments=True,
160
+ vision_model=ChatGoogleGenerativeAI(model="gemini-2.0-flash"),
161
+ )
162
+ docs = loader.load()
163
+ # PDF pages now include: "[Image: chart.png]\nA bar chart showing student grades..."
164
+ ```
165
+
166
+ ## ๐ŸŽฏ Selective Loading
167
+
168
+ ```python
169
+ loader = GoogleClassroomLoader(
170
+ course_ids=["123456789"],
171
+ load_assignments=True,
172
+ load_announcements=False,
173
+ load_materials=False,
174
+ load_attachments=False,
175
+ )
176
+ ```
177
+
178
+ ## ๐Ÿ“„ Document Structure
179
+
180
+ Each document includes rich metadata:
181
+
182
+ ```python
183
+ Document(
184
+ page_content="Assignment: Homework 3\n\nComplete exercises 1-5...",
185
+ metadata={
186
+ "source": "google_classroom",
187
+ "course_id": "12345",
188
+ "course_name": "Machine Learning",
189
+ "content_type": "assignment", # or "announcement", "material", "assignment_attachment"
190
+ "title": "Homework 3",
191
+ "item_id": "67890",
192
+ "created_time": "2024-01-15T10:00:00Z",
193
+ "updated_time": "2024-01-15T10:00:00Z",
194
+ "due_date": "2024-01-22T23:59:00", # assignments only
195
+ "max_points": 100.0, # assignments only
196
+ "alternate_link": "https://classroom.google.com/...",
197
+ }
198
+ )
199
+ ```
200
+
201
+ ## โš™๏ธ Configuration Reference
202
+
203
+ | Parameter | Type | Default | Description |
204
+ |-----------|------|---------|-------------|
205
+ | `course_ids` | `list[str]` | `None` | Specific course IDs (`None` = all accessible) |
206
+ | `load_assignments` | `bool` | `True` | Load courseWork items |
207
+ | `load_announcements` | `bool` | `True` | Load announcements |
208
+ | `load_materials` | `bool` | `True` | Load courseWorkMaterials |
209
+ | `load_attachments` | `bool` | `True` | Download and process Drive attachments |
210
+ | `parse_attachments` | `bool` | `True` | Parse files with BaseBlobParser |
211
+ | `load_images` | `bool` | `False` | Process image MIME types |
212
+ | `vision_model` | `BaseChatModel` | `None` | Vision LLM for image description |
213
+ | `image_prompt` | `str` | `None` | Custom prompt for vision model |
214
+ | `file_parser_cls` | `type[BaseBlobParser]` | `None` | Custom parser for all attachments |
215
+ | `file_parser_kwargs` | `dict` | `None` | kwargs for custom parser |
216
+ | `credentials` | `Credentials` | `None` | Pre-built Google credentials |
217
+ | `service_account_file` | `str` | `None` | Service account key JSON path |
218
+ | `token_file` | `str` | `None` | Cached OAuth token path |
219
+ | `client_secrets_file` | `str` | `None` | OAuth client secrets path |
220
+ | `scopes` | `list[str]` | Read-only | API scopes to request |
221
+
222
+ ## ๐Ÿ—๏ธ Architecture
223
+
224
+ ```
225
+ GoogleClassroomLoader (BaseLoader)
226
+ โ”œโ”€โ”€ _utilities.py โ€” auth, retry/backoff, guard_import
227
+ โ”œโ”€โ”€ classroom_api.py โ€” paginated Classroom API fetcher
228
+ โ”œโ”€โ”€ document_builder.py โ€” raw API โ†’ LangChain Document
229
+ โ”œโ”€โ”€ drive_resolver.py โ€” Drive download/export
230
+ โ”œโ”€โ”€ normalizer.py โ€” text cleanup (Unicode NFC, whitespace)
231
+ โ””โ”€โ”€ parsers/
232
+ โ”œโ”€โ”€ __init__.py โ€” MIME registry + get_parser()
233
+ โ”œโ”€โ”€ pdf_parser.py โ€” pypdf + vision LLM
234
+ โ”œโ”€โ”€ docx_parser.py โ€” python-docx
235
+ โ”œโ”€โ”€ text_parser.py โ€” built-in UTF-8
236
+ โ””โ”€โ”€ image_parser.py โ€” vision LLM + base64 fallback
237
+ ```
238
+
239
+ ## ๐Ÿงช Development
240
+
241
+ ```bash
242
+ # Clone and install
243
+ git clone https://github.com/ayanokojix21/langchain-google-classroom.git
244
+ cd langchain-google-classroom
245
+ pip install -e ".[dev]"
246
+
247
+ # Run tests
248
+ pytest tests/unit/ -v
249
+
250
+ # Lint
251
+ ruff check langchain_google_classroom/ tests/
252
+ ```
253
+
254
+ ## ๐Ÿ“ License
255
+
256
+ MIT โ€” see [LICENSE](LICENSE) for details.