clawed 2.0.4__tar.gz → 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clawed-2.0.4 → clawed-2.1.1}/PKG-INFO +1 -1
- {clawed-2.0.4 → clawed-2.1.1}/clawed/__init__.py +1 -1
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/generate_lesson_bundle.py +34 -18
- clawed-2.1.1/clawed/agent_core/tools/search_my_materials.py +149 -0
- clawed-2.1.1/clawed/asset_registry.py +409 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/generate.py +83 -24
- {clawed-2.0.4 → clawed-2.1.1}/clawed/ingestor.py +249 -1
- {clawed-2.0.4 → clawed-2.1.1}/clawed/slide_images.py +54 -4
- {clawed-2.0.4 → clawed-2.1.1}/pyproject.toml +1 -1
- clawed-2.0.4/clawed/agent_core/tools/search_my_materials.py +0 -97
- {clawed-2.0.4 → clawed-2.1.1}/.gitignore +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/LICENSE +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/README.md +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/__main__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/_legacy_gateway.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/approvals.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/autonomy.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/context.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/core.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/custom_tools.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/drive/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/drive/auth.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/drive/client.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/fake_llm.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/loop.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/curriculum.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/curriculum_kb.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/embeddings.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/episodes.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/identity.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/loader.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/memory/preferences.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/planner.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/prompt.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/scheduler.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/base.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/configure_profile.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/curriculum_map.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/drive_create_doc.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/drive_create_slides.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/drive_list.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/drive_organize.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/drive_read.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/drive_upload.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/export_document.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/gap_analysis.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/generate_assessment.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/generate_lesson.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/generate_materials.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/generate_unit.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/ingest_materials.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/parent_comm.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/read_heartbeat.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/read_workspace.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/request_approval.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/schedule_task.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/search_lessons.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/search_standards.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/student_insights.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/sub_packet.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/switch_model.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/agent_core/tools/update_soul.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/analytics.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/deps.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/chat.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/export.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/feedback.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/gateway_chat.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/generate.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/ingest.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/lessons.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/school.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/settings.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/routes/tools.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/server.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/static/app.js +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/static/style.css +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/static/widget.js +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/analytics.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/base.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/dashboard.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/generate.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/index.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/lesson.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/profile.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/settings.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/stats.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/api/templates/students.html +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/assessment.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/auth/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/auth/google_auth.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/bot_state.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/chat.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/cli.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/cli_chat.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/_helpers.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/bot.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/config.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/config_llm.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/config_profile.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/export.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/generate_assessment.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/generate_unit.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/queue.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/schedule_cmd.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/sub.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/commands/workspace_cmd.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/config.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/corpus.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/curriculum_map.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/database.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/demo/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/demo/demo_assessment.json +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/demo/demo_lesson_science_g6.json +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/demo/demo_lesson_social_studies_g8.json +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/demo/demo_unit_plan.json +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/differentiation.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/doc_export.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/drive.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/evaluation.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_docx.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_handout.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_markdown.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_pdf.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_pptx.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_templates.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/export_theme.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/exporter.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/feedback.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/formats/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/formats/flipchart.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/formats/notebook.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/formats/xbk.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/gateway.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/gateway_response.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/generation.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/export.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/feedback.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/gaps.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/generate.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/ingest.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/misc.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/onboard.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/schedule.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/handlers/standards.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/improver.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/io.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/lesson.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/llm.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/materials.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/mcp_server.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/memory_engine.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/model_router.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/models.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/onboarding.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/openclaw_plugin.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/parent_comm.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/persona.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/planner.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/504_accommodations.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/assessment.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/curriculum_gaps.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/dbq_assessment.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/differentiation.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/formative_assessment.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/iep_modification.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/lesson_plan.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/pacing_guide.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/parent_note.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/persona_extract.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/quiz.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/rubric.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/sub_packet.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/summative_assessment.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/tiered_assignments.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/unit_plan.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/worksheet.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/prompts/year_map.txt +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/quality.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/reading_report.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/router.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/sanitize.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/scheduler.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/school.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/search.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/art.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/base.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/computer_science.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/ela.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/foreign_language.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/history.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/library.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/math.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/music.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/physical_education.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/science.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/social_studies.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/skills/special_education.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/standards.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/state.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/state_standards.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/student_bot.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/student_cli.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/student_telegram_bot.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/sub_packet.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/task_queue.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/templates_lib.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/tools.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/transports/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/transports/cli.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/transports/openclaw.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/transports/student_telegram.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/transports/telegram.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/transports/web.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/tui.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/tui_chat.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/voice.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/clawed/workspace.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/eduagent/__init__.py +0 -0
- {clawed-2.0.4 → clawed-2.1.1}/eduagent/_compat.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: clawed
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.1
|
|
4
4
|
Summary: Claw-ED — personal AI teaching agent. Learns your voice, works while you sleep.
|
|
5
5
|
Project-URL: Homepage, https://github.com/SirhanMacx/Claw-ED
|
|
6
6
|
Project-URL: Documentation, https://github.com/SirhanMacx/Claw-ED#readme
|
|
@@ -17,7 +17,7 @@ if hasattr(sys.stderr, "reconfigure"):
|
|
|
17
17
|
except Exception:
|
|
18
18
|
pass
|
|
19
19
|
|
|
20
|
-
__version__ = "2.
|
|
20
|
+
__version__ = "2.1.1"
|
|
21
21
|
__author__ = "Jon Maccarello & Claw-ED contributors"
|
|
22
22
|
__description__ = "Personal AI teaching agent. Learns your voice, works while you sleep."
|
|
23
23
|
|
|
@@ -104,18 +104,32 @@ class GenerateLessonBundleTool:
|
|
|
104
104
|
topic=topic,
|
|
105
105
|
)
|
|
106
106
|
|
|
107
|
-
# ── Search
|
|
107
|
+
# ── Search for teacher's existing materials (assets + KB) ─────
|
|
108
108
|
kb_context = ""
|
|
109
109
|
kb_prompt_section = ""
|
|
110
|
+
|
|
111
|
+
# Asset-level search (complete files, YouTube links)
|
|
112
|
+
try:
|
|
113
|
+
from clawed.asset_registry import AssetRegistry
|
|
114
|
+
registry = AssetRegistry()
|
|
115
|
+
assets = registry.search_assets(context.teacher_id, topic, top_k=5)
|
|
116
|
+
yt_links = registry.get_youtube_links(context.teacher_id, topic, top_k=3)
|
|
117
|
+
if assets or yt_links:
|
|
118
|
+
kb_prompt_section = registry.format_asset_summary(assets, yt_links)
|
|
119
|
+
logger.info(
|
|
120
|
+
"Asset search found %d files, %d YouTube links for '%s'",
|
|
121
|
+
len(assets), len(yt_links), topic,
|
|
122
|
+
)
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.debug("Asset search failed: %s", e)
|
|
125
|
+
|
|
126
|
+
# KB chunk-level search (text excerpts)
|
|
110
127
|
try:
|
|
111
128
|
from clawed.agent_core.memory.curriculum_kb import CurriculumKB
|
|
112
129
|
kb = CurriculumKB()
|
|
113
130
|
kb_results = kb.search(context.teacher_id, topic, top_k=3)
|
|
114
131
|
if kb_results:
|
|
115
|
-
kb_parts = []
|
|
116
|
-
for r in kb_results:
|
|
117
|
-
if r.get("similarity", 0) > 0.1:
|
|
118
|
-
kb_parts.append(r)
|
|
132
|
+
kb_parts = [r for r in kb_results if r.get("similarity", 0) > 0.1]
|
|
119
133
|
if kb_parts:
|
|
120
134
|
kb_context = (
|
|
121
135
|
"\n\nRelevant materials from the teacher's files:\n"
|
|
@@ -124,20 +138,22 @@ class GenerateLessonBundleTool:
|
|
|
124
138
|
for r in kb_parts
|
|
125
139
|
)
|
|
126
140
|
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
"The teacher has created content on this topic before. "
|
|
131
|
-
"Reference and build on their existing work:\n\n"
|
|
132
|
-
+ "\n\n".join(
|
|
133
|
-
f"From \"{r['doc_title']}\":\n{r['chunk_text'][:500]}"
|
|
134
|
-
for r in kb_parts
|
|
135
|
-
)
|
|
136
|
-
+ "\n\nUse these materials as a foundation. Reference the teacher's existing "
|
|
137
|
-
"lessons, reuse their graphic organizer formats, build on their approach. "
|
|
138
|
-
"If the teacher has taught this topic before, extend their work — don't "
|
|
139
|
-
"start from scratch."
|
|
141
|
+
chunk_section = "\n\n".join(
|
|
142
|
+
f"From \"{r['doc_title']}\":\n{r['chunk_text'][:500]}"
|
|
143
|
+
for r in kb_parts
|
|
140
144
|
)
|
|
145
|
+
if kb_prompt_section:
|
|
146
|
+
kb_prompt_section += "\n\n" + chunk_section
|
|
147
|
+
else:
|
|
148
|
+
kb_prompt_section = (
|
|
149
|
+
"Teacher's Existing Materials on This Topic\n"
|
|
150
|
+
"The teacher has created content on this topic before. "
|
|
151
|
+
"Reference and build on their existing work:\n\n"
|
|
152
|
+
+ chunk_section
|
|
153
|
+
+ "\n\nUse these materials as a foundation. "
|
|
154
|
+
"Reference the teacher's existing lessons, reuse their "
|
|
155
|
+
"graphic organizer formats, build on their approach."
|
|
156
|
+
)
|
|
141
157
|
logger.info("KB search found %d relevant chunks for '%s'", len(kb_parts), topic)
|
|
142
158
|
except Exception as e:
|
|
143
159
|
logger.debug("KB search failed: %s", e)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Tool: search_my_materials — search the teacher's uploaded curriculum files.
|
|
2
|
+
|
|
3
|
+
This is the key tool that makes Claw-ED curriculum-aware. The agent calls
|
|
4
|
+
this before generating to find relevant prior work in the teacher's own
|
|
5
|
+
uploaded materials. Now includes asset-level awareness (slideshows, handouts,
|
|
6
|
+
YouTube links) alongside text chunk search.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from clawed.agent_core.context import AgentContext, ToolResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SearchMyMaterialsTool:
|
|
18
|
+
"""Search the teacher's curriculum knowledge base for relevant content."""
|
|
19
|
+
|
|
20
|
+
def schema(self) -> dict[str, Any]:
|
|
21
|
+
return {
|
|
22
|
+
"type": "function",
|
|
23
|
+
"function": {
|
|
24
|
+
"name": "search_my_materials",
|
|
25
|
+
"description": (
|
|
26
|
+
"Search the teacher's uploaded curriculum files for relevant "
|
|
27
|
+
"content. Use this BEFORE generating lessons, units, or materials "
|
|
28
|
+
"to ground your output in the teacher's own prior work. Returns "
|
|
29
|
+
"matching files (slideshows, handouts, assessments), YouTube links, "
|
|
30
|
+
"and text excerpts with source file attribution."
|
|
31
|
+
),
|
|
32
|
+
"parameters": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {
|
|
35
|
+
"query": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"description": (
|
|
38
|
+
"What to search for — a topic, concept, or question. "
|
|
39
|
+
"Example: 'Civil War causes', 'photosynthesis lab', "
|
|
40
|
+
"'fractions worksheet'"
|
|
41
|
+
),
|
|
42
|
+
},
|
|
43
|
+
"top_k": {
|
|
44
|
+
"type": "integer",
|
|
45
|
+
"description": "Maximum results to return (default 5)",
|
|
46
|
+
"default": 5,
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
"required": ["query"],
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async def execute(
|
|
55
|
+
self, params: dict[str, Any], context: AgentContext
|
|
56
|
+
) -> ToolResult:
|
|
57
|
+
query = params["query"]
|
|
58
|
+
top_k = params.get("top_k", 5)
|
|
59
|
+
teacher_id = context.teacher_id
|
|
60
|
+
|
|
61
|
+
lines: list[str] = []
|
|
62
|
+
|
|
63
|
+
# ── Asset-level search (files, YouTube links) ──────────────
|
|
64
|
+
try:
|
|
65
|
+
from clawed.asset_registry import AssetRegistry
|
|
66
|
+
registry = AssetRegistry()
|
|
67
|
+
assets = registry.search_assets(teacher_id, query, top_k=top_k)
|
|
68
|
+
yt_links = registry.get_youtube_links(teacher_id, query, top_k=3)
|
|
69
|
+
|
|
70
|
+
if assets:
|
|
71
|
+
lines.append("EXISTING MATERIALS:\n")
|
|
72
|
+
for i, a in enumerate(assets, 1):
|
|
73
|
+
type_label = a["material_type"].replace("_", " ").title()
|
|
74
|
+
extras: list[str] = []
|
|
75
|
+
if a.get("slide_count"):
|
|
76
|
+
extras.append(f"{a['slide_count']} slides")
|
|
77
|
+
if a.get("image_count"):
|
|
78
|
+
extras.append(f"{a['image_count']} images")
|
|
79
|
+
yt_raw = a.get("youtube_urls", [])
|
|
80
|
+
yt_list = json.loads(yt_raw) if isinstance(yt_raw, str) else yt_raw
|
|
81
|
+
yt_count = len(yt_list)
|
|
82
|
+
if yt_count:
|
|
83
|
+
extras.append(f"{yt_count} YouTube links")
|
|
84
|
+
extra_str = f" ({', '.join(extras)})" if extras else ""
|
|
85
|
+
lines.append(
|
|
86
|
+
f" {i}. [{type_label}] \"{a['title']}\"{extra_str}\n"
|
|
87
|
+
f" File: {a['filename']}\n"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if yt_links:
|
|
91
|
+
lines.append("YOUTUBE LINKS IN YOUR FILES:\n")
|
|
92
|
+
for link in yt_links:
|
|
93
|
+
lines.append(f" - {link['url']} (from \"{link['from_file']}\")\n")
|
|
94
|
+
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
# ── Chunk-level search (text excerpts) ─────────────────────
|
|
99
|
+
try:
|
|
100
|
+
from clawed.agent_core.memory.curriculum_kb import CurriculumKB
|
|
101
|
+
|
|
102
|
+
kb = CurriculumKB()
|
|
103
|
+
results = kb.search(teacher_id, query, top_k=top_k)
|
|
104
|
+
|
|
105
|
+
if not results and not lines:
|
|
106
|
+
stats = kb.stats(teacher_id)
|
|
107
|
+
if stats["doc_count"] == 0:
|
|
108
|
+
return ToolResult(
|
|
109
|
+
text="No curriculum files uploaded yet. Ask the teacher "
|
|
110
|
+
"to share their lesson plans, handouts, or other "
|
|
111
|
+
"teaching materials so you can reference them."
|
|
112
|
+
)
|
|
113
|
+
return ToolResult(
|
|
114
|
+
text=f"No matches found for '{query}' in "
|
|
115
|
+
f"{stats['doc_count']} uploaded documents."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if results:
|
|
119
|
+
lines.append("RELEVANT EXCERPTS:\n")
|
|
120
|
+
for i, r in enumerate(results, 1):
|
|
121
|
+
source = r["doc_title"]
|
|
122
|
+
if r.get("source_path"):
|
|
123
|
+
fname = Path(r["source_path"]).name
|
|
124
|
+
source = f"{r['doc_title']} ({fname})"
|
|
125
|
+
sim_pct = int(r["similarity"] * 100)
|
|
126
|
+
text_preview = r["chunk_text"][:300]
|
|
127
|
+
if len(r["chunk_text"]) > 300:
|
|
128
|
+
text_preview += "..."
|
|
129
|
+
lines.append(
|
|
130
|
+
f" {i}. From '{source}' ({sim_pct}% match):\n"
|
|
131
|
+
f" {text_preview}\n"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
if not lines:
|
|
136
|
+
return ToolResult(text=f"Failed to search curriculum files: {e}")
|
|
137
|
+
|
|
138
|
+
if lines:
|
|
139
|
+
header = f"Found materials related to \"{query}\":\n\n"
|
|
140
|
+
lines.append(
|
|
141
|
+
"\nWould you like me to use these existing materials, "
|
|
142
|
+
"enhance them, or create something new?"
|
|
143
|
+
)
|
|
144
|
+
return ToolResult(
|
|
145
|
+
text=header + "\n".join(lines),
|
|
146
|
+
data={"query": query},
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return ToolResult(text=f"No materials found for '{query}'.")
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""Asset registry — file-level awareness of teacher's materials.
|
|
2
|
+
|
|
3
|
+
Sits alongside the curriculum KB (same SQLite database). While the KB stores
|
|
4
|
+
text chunks for semantic search, the asset registry stores one row per *file*
|
|
5
|
+
with rich metadata: material type, embedded images, YouTube links, slide counts.
|
|
6
|
+
|
|
7
|
+
This powers the "I found your Reconstruction PPT from 2020" experience.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
import sqlite3
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
_DEFAULT_DB = Path.home() / ".eduagent" / "memory" / "curriculum_kb.db"
|
|
24
|
+
|
|
25
|
+
# ── YouTube URL normalization ────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
_YT_PATTERNS = [
|
|
28
|
+
re.compile(r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})'),
|
|
29
|
+
re.compile(r'(?:https?://)?youtu\.be/([a-zA-Z0-9_-]{11})'),
|
|
30
|
+
re.compile(r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})'),
|
|
31
|
+
re.compile(r'(?:https?://)?m\.youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})'),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
_URL_PATTERN = re.compile(r'https?://[^\s<>"\')\]]+')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def extract_youtube_ids(text: str) -> list[str]:
|
|
38
|
+
"""Extract unique YouTube video IDs from text."""
|
|
39
|
+
ids: list[str] = []
|
|
40
|
+
for pat in _YT_PATTERNS:
|
|
41
|
+
for m in pat.finditer(text):
|
|
42
|
+
vid = m.group(1)
|
|
43
|
+
if vid not in ids:
|
|
44
|
+
ids.append(vid)
|
|
45
|
+
return ids
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def extract_urls(text: str) -> list[str]:
|
|
49
|
+
"""Extract all URLs from text."""
|
|
50
|
+
return _URL_PATTERN.findall(text)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def classify_url(url: str) -> str:
|
|
54
|
+
"""Classify a URL type."""
|
|
55
|
+
lower = url.lower()
|
|
56
|
+
if 'youtube.com' in lower or 'youtu.be' in lower:
|
|
57
|
+
return 'youtube'
|
|
58
|
+
if 'docs.google.com' in lower or 'drive.google.com' in lower:
|
|
59
|
+
return 'google_doc'
|
|
60
|
+
return 'website'
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ── Material type classification ─────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
_ASSESSMENT_KEYWORDS = {'test', 'quiz', 'exam', 'assessment', 'midterm', 'final', 'regents'}
|
|
66
|
+
_HANDOUT_KEYWORDS = {'handout', 'worksheet', 'graphic organizer', 'organizer', 'guided notes'}
|
|
67
|
+
_UNIT_PLAN_KEYWORDS = {'unit plan', 'essential questions', 'enduring understandings', 'pacing'}
|
|
68
|
+
_LESSON_PLAN_KEYWORDS = {'lesson plan', 'objective', 'do now', 'exit ticket', 'swbat'}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def classify_material_type(
|
|
72
|
+
doc_type: str, text: str, filename: str, slide_count: int | None = None,
|
|
73
|
+
) -> str:
|
|
74
|
+
"""Classify a document into a material type using heuristics."""
|
|
75
|
+
lower_fn = filename.lower()
|
|
76
|
+
lower_text = text[:2000].lower()
|
|
77
|
+
|
|
78
|
+
if doc_type == 'pptx':
|
|
79
|
+
if slide_count and slide_count >= 8:
|
|
80
|
+
return 'slideshow'
|
|
81
|
+
if slide_count and slide_count <= 3:
|
|
82
|
+
return 'fragment'
|
|
83
|
+
return 'slideshow'
|
|
84
|
+
|
|
85
|
+
combined = lower_fn + " " + lower_text
|
|
86
|
+
|
|
87
|
+
if any(kw in combined for kw in _ASSESSMENT_KEYWORDS):
|
|
88
|
+
return 'assessment'
|
|
89
|
+
if any(kw in combined for kw in _HANDOUT_KEYWORDS):
|
|
90
|
+
return 'handout'
|
|
91
|
+
if any(kw in combined for kw in _UNIT_PLAN_KEYWORDS):
|
|
92
|
+
return 'unit_plan'
|
|
93
|
+
if any(kw in combined for kw in _LESSON_PLAN_KEYWORDS):
|
|
94
|
+
return 'lesson_plan'
|
|
95
|
+
if doc_type == 'docx':
|
|
96
|
+
return 'notes'
|
|
97
|
+
return 'unknown'
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ── Extracted metadata dataclasses ───────────────────────────────────
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class ExtractedImage:
|
|
104
|
+
"""An image extracted from a teaching document."""
|
|
105
|
+
image_bytes: bytes
|
|
106
|
+
format: str # 'png', 'jpeg', 'gif'
|
|
107
|
+
width: int | None = None
|
|
108
|
+
height: int | None = None
|
|
109
|
+
alt_text: str = ''
|
|
110
|
+
context_text: str = ''
|
|
111
|
+
slide_number: int | None = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class ExtractedURL:
|
|
116
|
+
"""A URL found in a teaching document."""
|
|
117
|
+
url: str
|
|
118
|
+
link_type: str # 'youtube', 'website', 'google_doc'
|
|
119
|
+
context_text: str = ''
|
|
120
|
+
title_hint: str = ''
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class ExtractionResult:
|
|
125
|
+
"""Rich extraction result from a document."""
|
|
126
|
+
text: str
|
|
127
|
+
page_count: int | None = None
|
|
128
|
+
slide_count: int | None = None
|
|
129
|
+
images: list[ExtractedImage] = field(default_factory=list)
|
|
130
|
+
urls: list[ExtractedURL] = field(default_factory=list)
|
|
131
|
+
word_count: int = 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ── Asset Registry ───────────────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
class AssetRegistry:
|
|
137
|
+
"""File-level asset registry for teacher's materials.
|
|
138
|
+
|
|
139
|
+
Tracks complete files (slideshows, handouts, assessments) with metadata
|
|
140
|
+
about embedded images, YouTube links, material type, and completeness.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, db_path: Path | None = None):
|
|
144
|
+
self._db_path = db_path or _DEFAULT_DB
|
|
145
|
+
self._db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
self._init_db()
|
|
147
|
+
|
|
148
|
+
def _init_db(self) -> None:
|
|
149
|
+
with sqlite3.connect(self._db_path) as conn:
|
|
150
|
+
conn.executescript("""
|
|
151
|
+
CREATE TABLE IF NOT EXISTS assets (
|
|
152
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
153
|
+
teacher_id TEXT NOT NULL,
|
|
154
|
+
source_path TEXT NOT NULL,
|
|
155
|
+
filename TEXT NOT NULL,
|
|
156
|
+
doc_type TEXT NOT NULL,
|
|
157
|
+
material_type TEXT NOT NULL,
|
|
158
|
+
title TEXT NOT NULL,
|
|
159
|
+
topic_tags TEXT DEFAULT '[]',
|
|
160
|
+
grade_hint TEXT DEFAULT '',
|
|
161
|
+
slide_count INTEGER,
|
|
162
|
+
page_count INTEGER,
|
|
163
|
+
word_count INTEGER DEFAULT 0,
|
|
164
|
+
has_images INTEGER DEFAULT 0,
|
|
165
|
+
image_count INTEGER DEFAULT 0,
|
|
166
|
+
youtube_urls TEXT DEFAULT '[]',
|
|
167
|
+
external_urls TEXT DEFAULT '[]',
|
|
168
|
+
completeness TEXT DEFAULT 'unknown',
|
|
169
|
+
file_size_bytes INTEGER,
|
|
170
|
+
content_hash TEXT NOT NULL,
|
|
171
|
+
indexed_at TEXT NOT NULL,
|
|
172
|
+
UNIQUE(teacher_id, content_hash)
|
|
173
|
+
);
|
|
174
|
+
CREATE INDEX IF NOT EXISTS idx_assets_teacher
|
|
175
|
+
ON assets(teacher_id);
|
|
176
|
+
CREATE INDEX IF NOT EXISTS idx_assets_material_type
|
|
177
|
+
ON assets(teacher_id, material_type);
|
|
178
|
+
|
|
179
|
+
CREATE TABLE IF NOT EXISTS asset_images (
|
|
180
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
181
|
+
asset_id INTEGER NOT NULL REFERENCES assets(id),
|
|
182
|
+
image_index INTEGER NOT NULL,
|
|
183
|
+
image_path TEXT NOT NULL,
|
|
184
|
+
image_format TEXT DEFAULT '',
|
|
185
|
+
width_px INTEGER,
|
|
186
|
+
height_px INTEGER,
|
|
187
|
+
alt_text TEXT DEFAULT '',
|
|
188
|
+
context_text TEXT DEFAULT '',
|
|
189
|
+
slide_number INTEGER,
|
|
190
|
+
UNIQUE(asset_id, image_index)
|
|
191
|
+
);
|
|
192
|
+
|
|
193
|
+
CREATE TABLE IF NOT EXISTS asset_links (
|
|
194
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
195
|
+
asset_id INTEGER NOT NULL REFERENCES assets(id),
|
|
196
|
+
url TEXT NOT NULL,
|
|
197
|
+
link_type TEXT NOT NULL,
|
|
198
|
+
context_text TEXT DEFAULT '',
|
|
199
|
+
title_hint TEXT DEFAULT '',
|
|
200
|
+
UNIQUE(asset_id, url)
|
|
201
|
+
);
|
|
202
|
+
""")
|
|
203
|
+
|
|
204
|
+
def register_asset(
|
|
205
|
+
self,
|
|
206
|
+
teacher_id: str,
|
|
207
|
+
source_path: str,
|
|
208
|
+
title: str,
|
|
209
|
+
doc_type: str,
|
|
210
|
+
text: str,
|
|
211
|
+
extraction: ExtractionResult | None = None,
|
|
212
|
+
) -> int | None:
|
|
213
|
+
"""Register a file as an asset. Returns asset ID or None if duplicate."""
|
|
214
|
+
content_hash = hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()[:32]
|
|
215
|
+
filename = Path(source_path).name
|
|
216
|
+
|
|
217
|
+
material_type = classify_material_type(
|
|
218
|
+
doc_type, text, filename,
|
|
219
|
+
slide_count=extraction.slide_count if extraction else None,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
word_count = len(text.split()) if text else 0
|
|
223
|
+
slide_count = extraction.slide_count if extraction else None
|
|
224
|
+
page_count = extraction.page_count if extraction else None
|
|
225
|
+
|
|
226
|
+
# Extract URLs from text
|
|
227
|
+
yt_ids = extract_youtube_ids(text)
|
|
228
|
+
youtube_urls = [f"https://youtube.com/watch?v={vid}" for vid in yt_ids]
|
|
229
|
+
all_urls = extract_urls(text)
|
|
230
|
+
external_urls = [u for u in all_urls if 'youtube' not in u.lower() and 'youtu.be' not in u.lower()]
|
|
231
|
+
|
|
232
|
+
# Add URLs from extraction result
|
|
233
|
+
if extraction:
|
|
234
|
+
for eu in extraction.urls:
|
|
235
|
+
if eu.link_type == 'youtube' and eu.url not in youtube_urls:
|
|
236
|
+
youtube_urls.append(eu.url)
|
|
237
|
+
elif eu.url not in external_urls:
|
|
238
|
+
external_urls.append(eu.url)
|
|
239
|
+
|
|
240
|
+
image_count = len(extraction.images) if extraction else 0
|
|
241
|
+
has_images = 1 if image_count > 0 else 0
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
file_size = Path(source_path).stat().st_size if Path(source_path).exists() else 0
|
|
245
|
+
except Exception:
|
|
246
|
+
file_size = 0
|
|
247
|
+
|
|
248
|
+
completeness = 'complete' if material_type in ('slideshow', 'assessment', 'handout') else 'unknown'
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
with sqlite3.connect(self._db_path) as conn:
|
|
252
|
+
cursor = conn.execute(
|
|
253
|
+
"INSERT OR IGNORE INTO assets "
|
|
254
|
+
"(teacher_id, source_path, filename, doc_type, material_type, title, "
|
|
255
|
+
"word_count, slide_count, page_count, has_images, image_count, "
|
|
256
|
+
"youtube_urls, external_urls, completeness, file_size_bytes, "
|
|
257
|
+
"content_hash, indexed_at) "
|
|
258
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
259
|
+
(
|
|
260
|
+
teacher_id, source_path, filename, doc_type, material_type, title,
|
|
261
|
+
word_count, slide_count, page_count, has_images, image_count,
|
|
262
|
+
json.dumps(youtube_urls), json.dumps(external_urls),
|
|
263
|
+
completeness, file_size, content_hash,
|
|
264
|
+
datetime.now().isoformat(),
|
|
265
|
+
),
|
|
266
|
+
)
|
|
267
|
+
if cursor.rowcount == 0:
|
|
268
|
+
return None # duplicate
|
|
269
|
+
asset_id = cursor.lastrowid
|
|
270
|
+
|
|
271
|
+
# Store image references
|
|
272
|
+
if extraction and extraction.images:
|
|
273
|
+
cache_dir = self._db_path.parent.parent / "cache" / "extracted" / content_hash
|
|
274
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
275
|
+
for idx, img in enumerate(extraction.images):
|
|
276
|
+
ext = img.format.lower().replace('jpeg', 'jpg')
|
|
277
|
+
img_path = cache_dir / f"{idx}.{ext}"
|
|
278
|
+
try:
|
|
279
|
+
img_path.write_bytes(img.image_bytes)
|
|
280
|
+
conn.execute(
|
|
281
|
+
"INSERT OR IGNORE INTO asset_images "
|
|
282
|
+
"(asset_id, image_index, image_path, image_format, "
|
|
283
|
+
"width_px, height_px, alt_text, context_text, slide_number) "
|
|
284
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
285
|
+
(
|
|
286
|
+
asset_id, idx, str(img_path), img.format,
|
|
287
|
+
img.width, img.height, img.alt_text,
|
|
288
|
+
img.context_text, img.slide_number,
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logger.debug("Failed to save image %d: %s", idx, e)
|
|
293
|
+
|
|
294
|
+
# Store link references
|
|
295
|
+
for yt_url in youtube_urls:
|
|
296
|
+
conn.execute(
|
|
297
|
+
"INSERT OR IGNORE INTO asset_links "
|
|
298
|
+
"(asset_id, url, link_type, context_text) VALUES (?, ?, ?, ?)",
|
|
299
|
+
(asset_id, yt_url, 'youtube', ''),
|
|
300
|
+
)
|
|
301
|
+
for ext_url in external_urls[:20]: # cap at 20 external URLs
|
|
302
|
+
conn.execute(
|
|
303
|
+
"INSERT OR IGNORE INTO asset_links "
|
|
304
|
+
"(asset_id, url, link_type, context_text) VALUES (?, ?, ?, ?)",
|
|
305
|
+
(asset_id, ext_url, classify_url(ext_url), ''),
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
return asset_id
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.debug("Asset registration failed: %s", e)
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
def search_assets(
|
|
314
|
+
self, teacher_id: str, query: str, top_k: int = 10,
|
|
315
|
+
) -> list[dict[str, Any]]:
|
|
316
|
+
"""Search assets by keyword matching on title, filename, and material type."""
|
|
317
|
+
keywords = [w.lower() for w in query.split() if len(w) > 2]
|
|
318
|
+
if not keywords:
|
|
319
|
+
return []
|
|
320
|
+
|
|
321
|
+
with sqlite3.connect(self._db_path) as conn:
|
|
322
|
+
conn.row_factory = sqlite3.Row
|
|
323
|
+
rows = conn.execute(
|
|
324
|
+
"SELECT * FROM assets WHERE teacher_id = ?", (teacher_id,),
|
|
325
|
+
).fetchall()
|
|
326
|
+
|
|
327
|
+
scored: list[tuple[float, dict]] = []
|
|
328
|
+
for row in rows:
|
|
329
|
+
title_lower = row["title"].lower()
|
|
330
|
+
fn_lower = row["filename"].lower()
|
|
331
|
+
combined = title_lower + " " + fn_lower
|
|
332
|
+
score = sum(1 for kw in keywords if kw in combined)
|
|
333
|
+
if score > 0:
|
|
334
|
+
asset = dict(row)
|
|
335
|
+
asset["youtube_urls"] = json.loads(asset["youtube_urls"])
|
|
336
|
+
asset["external_urls"] = json.loads(asset["external_urls"])
|
|
337
|
+
scored.append((score, asset))
|
|
338
|
+
|
|
339
|
+
scored.sort(key=lambda x: (-x[0], x[1].get("material_type", "")))
|
|
340
|
+
return [item[1] for item in scored[:top_k]]
|
|
341
|
+
|
|
342
|
+
def get_youtube_links(self, teacher_id: str, query: str, top_k: int = 5) -> list[dict]:
|
|
343
|
+
"""Search for YouTube links related to a topic."""
|
|
344
|
+
assets = self.search_assets(teacher_id, query, top_k=50)
|
|
345
|
+
links: list[dict] = []
|
|
346
|
+
seen: set[str] = set()
|
|
347
|
+
for asset in assets:
|
|
348
|
+
for yt_url in asset.get("youtube_urls", []):
|
|
349
|
+
if yt_url not in seen:
|
|
350
|
+
seen.add(yt_url)
|
|
351
|
+
links.append({
|
|
352
|
+
"url": yt_url,
|
|
353
|
+
"from_file": asset["title"],
|
|
354
|
+
"material_type": asset["material_type"],
|
|
355
|
+
})
|
|
356
|
+
if len(links) >= top_k:
|
|
357
|
+
return links
|
|
358
|
+
return links
|
|
359
|
+
|
|
360
|
+
def format_asset_summary(self, assets: list[dict], youtube_links: list[dict] | None = None) -> str:
|
|
361
|
+
"""Format a human-readable summary of found assets."""
|
|
362
|
+
if not assets and not youtube_links:
|
|
363
|
+
return ""
|
|
364
|
+
|
|
365
|
+
lines: list[str] = []
|
|
366
|
+
if assets:
|
|
367
|
+
lines.append("Teacher's Existing Materials on This Topic:")
|
|
368
|
+
for a in assets:
|
|
369
|
+
type_label = a["material_type"].replace("_", " ").title()
|
|
370
|
+
extras: list[str] = []
|
|
371
|
+
if a.get("slide_count"):
|
|
372
|
+
extras.append(f"{a['slide_count']} slides")
|
|
373
|
+
if a.get("image_count"):
|
|
374
|
+
extras.append(f"{a['image_count']} images")
|
|
375
|
+
yt_count = len(a.get("youtube_urls", []))
|
|
376
|
+
if yt_count:
|
|
377
|
+
extras.append(f"{yt_count} YouTube links")
|
|
378
|
+
extra_str = f" ({', '.join(extras)})" if extras else ""
|
|
379
|
+
lines.append(f" - [{type_label}] \"{a['title']}\"{extra_str}")
|
|
380
|
+
lines.append(f" File: {a['filename']}")
|
|
381
|
+
|
|
382
|
+
if youtube_links:
|
|
383
|
+
lines.append("\nYouTube Links Found in Your Files:")
|
|
384
|
+
for link in youtube_links:
|
|
385
|
+
lines.append(f" - {link['url']} (from \"{link['from_file']}\")")
|
|
386
|
+
|
|
387
|
+
lines.append(
|
|
388
|
+
"\nReference and build on these existing materials. "
|
|
389
|
+
"If the teacher has taught this topic before, extend their work."
|
|
390
|
+
)
|
|
391
|
+
return "\n".join(lines)
|
|
392
|
+
|
|
393
|
+
def stats(self, teacher_id: str) -> dict[str, int]:
|
|
394
|
+
"""Return asset counts."""
|
|
395
|
+
with sqlite3.connect(self._db_path) as conn:
|
|
396
|
+
total = conn.execute(
|
|
397
|
+
"SELECT COUNT(*) FROM assets WHERE teacher_id = ?", (teacher_id,),
|
|
398
|
+
).fetchone()[0]
|
|
399
|
+
images = conn.execute(
|
|
400
|
+
"SELECT COUNT(*) FROM asset_images ai "
|
|
401
|
+
"JOIN assets a ON ai.asset_id = a.id WHERE a.teacher_id = ?",
|
|
402
|
+
(teacher_id,),
|
|
403
|
+
).fetchone()[0]
|
|
404
|
+
links = conn.execute(
|
|
405
|
+
"SELECT COUNT(*) FROM asset_links al "
|
|
406
|
+
"JOIN assets a ON al.asset_id = a.id WHERE a.teacher_id = ?",
|
|
407
|
+
(teacher_id,),
|
|
408
|
+
).fetchone()[0]
|
|
409
|
+
return {"asset_count": total, "image_count": images, "link_count": links}
|