@bluefly/openstandardagents 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.devfile.yaml +1 -1
- package/.env.example +1 -1
- package/.github/AGENTS.md +245 -0
- package/.github/agents/github-issue-triage.ossa.yaml +99 -0
- package/.github/agents/github-pr-triage.ossa.yaml +137 -0
- package/.github/workflows/issue-sync-to-gitlab.yml +138 -0
- package/.github/workflows/pr-triage-to-gitlab.yml +164 -0
- package/.version.json +2 -2
- package/.wiki-config.json +24 -0
- package/CHANGELOG.md +44 -18
- package/CODEOWNERS +75 -0
- package/CONTRIBUTING.md +103 -4
- package/README.md +178 -243
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/repositories/schema.repository.d.ts +6 -1
- package/dist/repositories/schema.repository.d.ts.map +1 -1
- package/dist/repositories/schema.repository.js +49 -27
- package/dist/repositories/schema.repository.js.map +1 -1
- package/dist/services/migration.service.d.ts +4 -3
- package/dist/services/migration.service.d.ts.map +1 -1
- package/dist/services/migration.service.js +11 -10
- package/dist/services/migration.service.js.map +1 -1
- package/dist/services/release-automation/release.service.js +1 -1
- package/dist/services/release-automation/release.service.js.map +1 -1
- package/dist/services/release-automation/schemas/release.schema.js +1 -1
- package/dist/services/release-automation/webhook.service.js +3 -3
- package/dist/services/release-automation/webhook.service.js.map +1 -1
- package/dist/services/runtime/claude/claude-adapter.d.ts +1 -1
- package/dist/services/runtime/claude/claude-adapter.d.ts.map +1 -1
- package/dist/services/runtime/claude/claude-adapter.js +2 -2
- package/dist/services/runtime/claude/claude-adapter.js.map +1 -1
- package/dist/spec/v0.2.8/CHANGELOG.md +401 -0
- package/dist/spec/v0.2.8/README.md +72 -0
- package/dist/spec/v0.2.8/migrations/v0.2.3-to-v0.2.4.md +599 -0
- package/dist/spec/v0.2.8/migrations/v0.2.5-RC-to-v0.2.6.md +65 -0
- package/dist/spec/v0.2.8/migrations/v0.2.6-to-v0.2.8.md +81 -0
- package/{spec/v0.2.6-dev/ossa-0.2.5.schema.json → dist/spec/v0.2.8/ossa-0.2.8.schema.json} +1509 -52
- package/dist/spec/v0.2.8/ossa-0.2.8.yaml +581 -0
- package/dist/spec/v0.2.9/a2a-protocol.md +1337 -0
- package/dist/spec/v0.2.9/agent.md +1946 -0
- package/dist/spec/v0.2.9/capabilities/index.yaml +25 -0
- package/dist/spec/v0.2.9/capabilities/memory.yaml +251 -0
- package/dist/spec/v0.2.9/capability-schema.md +576 -0
- package/dist/spec/v0.2.9/compliance-profiles.md +533 -0
- package/dist/spec/v0.2.9/conformance-testing.md +1527 -0
- package/dist/spec/v0.2.9/gitlab-duo-integration.md +621 -0
- package/dist/spec/v0.2.9/ossa-0.2.9.schema.json +3699 -0
- package/dist/spec/v0.2.9/runtime-semantics.md +464 -0
- package/dist/spec/v0.2.9/security-model.md +1245 -0
- package/dist/spec/v0.2.9/semantic-conventions.md +347 -0
- package/dist/spec/v0.2.9/types.ts +522 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/policy.d.ts +377 -0
- package/dist/types/policy.d.ts.map +1 -0
- package/dist/types/policy.js +84 -0
- package/dist/types/policy.js.map +1 -0
- package/dist/utils/index.d.ts +6 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +6 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/version.d.ts +68 -0
- package/dist/utils/version.d.ts.map +1 -0
- package/dist/utils/version.js +156 -0
- package/dist/utils/version.js.map +1 -0
- package/docs/specs/policy-dsl.md +925 -0
- package/eslint-report.json +1 -0
- package/examples/adk-integration/code-review-workflow.yml +1 -1
- package/examples/adk-integration/customer-support.yml +1 -1
- package/examples/adk-integration/data-pipeline.yml +1 -1
- package/examples/advanced/reasoning-agent.yaml +136 -0
- package/examples/advanced/workflows/hybrid-model-strategy.yaml +1 -1
- package/examples/agent-manifests/critics/critic-agent.yaml +1 -1
- package/examples/agent-manifests/governors/governor-agent.yaml +1 -1
- package/examples/agent-manifests/integrators/integrator-agent.yaml +1 -1
- package/examples/agent-manifests/judges/judge-agent.yaml +1 -1
- package/examples/agent-manifests/monitors/monitor-agent.yaml +1 -1
- package/examples/agent-manifests/orchestrators/orchestrator-agent.yaml +1 -1
- package/examples/agent-manifests/sample-compliant-agent.yaml +1 -1
- package/examples/agent-manifests/workers/worker-agent.yaml +1 -1
- package/examples/agents-md/code-agent.ossa.json +100 -0
- package/examples/agents-md/monorepo-agent.ossa.yaml +180 -0
- package/examples/anthropic/claude-assistant.ossa.json +1 -1
- package/examples/autogen/multi-agent.ossa.json +1 -1
- package/examples/claude-code/code-reviewer.ossa.yaml +78 -0
- package/examples/claude-code/ossa-validator.ossa.yaml +80 -0
- package/examples/common_npm/agent-router.ossa.yaml +1 -0
- package/examples/common_npm/agent-router.v0.2.2.ossa.yaml +1 -1
- package/examples/crewai/research-team.ossa.json +1 -1
- package/examples/cursor/code-review-agent.ossa.json +1 -1
- package/examples/drupal/gitlab-ml-recommender.ossa.yaml +1 -0
- package/examples/drupal/gitlab-ml-recommender.v0.2.2.ossa.yaml +1 -1
- package/examples/extensions/agents-md-v1.yml +175 -0
- package/examples/extensions/drupal-v1.yml +1 -1
- package/examples/extensions/kagent-v1.yml +1 -1
- package/examples/getting-started/hello-world-complete.ossa.yaml +1 -1
- package/examples/integration-patterns/agent-to-agent-orchestration.ossa.yaml +4 -4
- package/examples/kagent/compliance-validator.ossa.yaml +1 -1
- package/examples/kagent/cost-optimizer.ossa.yaml +1 -1
- package/examples/kagent/documentation-agent.ossa.yaml +1 -1
- package/examples/kagent/k8s-troubleshooter-v1.ossa.yaml +1 -0
- package/examples/kagent/k8s-troubleshooter-v1.v0.2.2.ossa.yaml +1 -1
- package/examples/kagent/k8s-troubleshooter.ossa.yaml +1 -1
- package/examples/kagent/security-scanner.ossa.yaml +1 -1
- package/examples/langchain/chain-agent.ossa.json +1 -1
- package/examples/langflow/workflow-agent.ossa.json +1 -1
- package/examples/langgraph/state-machine-agent.ossa.json +1 -1
- package/examples/llamaindex/rag-agent.ossa.json +1 -1
- package/examples/migration-guides/from-langchain-to-ossa.yaml +4 -4
- package/examples/multi-agent/README.md +74 -0
- package/examples/multi-agent/conditional-router.ossa.yaml +42 -0
- package/examples/multi-agent/parallel-execution.ossa.yaml +54 -0
- package/examples/multi-agent/sequential-pipeline.ossa.yaml +45 -0
- package/examples/openai/basic-agent.ossa.yaml +1 -1
- package/examples/openai/multi-tool-agent.ossa.json +1 -1
- package/examples/openai/swarm-agent.ossa.json +1 -1
- package/examples/production/document-analyzer-openai.yml +1 -1
- package/examples/quickstart/support-agent.ossa.yaml +1 -1
- package/examples/spec-examples/audit-agent.yml +1 -1
- package/examples/spec-examples/chat-agent.yml +1 -1
- package/examples/spec-examples/compliance-agent.yml +1 -1
- package/examples/spec-examples/monitoring-agent.yml +1 -1
- package/examples/spec-examples/workflow-agent.yml +1 -1
- package/examples/templates/ossa-compliance.yaml +1 -1
- package/examples/vercel/edge-agent.ossa.json +1 -1
- package/gl-code-quality-report.json +62 -0
- package/llms-ctx-full.txt +39 -0
- package/llms-ctx.txt +39 -0
- package/llms.txt +47 -0
- package/package.json +6 -3
- package/scripts/README.md +25 -0
- package/scripts/compliance-audit.ts +796 -0
- package/scripts/eslint-to-codequality.cjs +34 -0
- package/scripts/generate-agents-catalog.ts +2 -1
- package/scripts/generate-api-docs.ts +2 -1
- package/scripts/generate-examples-docs.ts +2 -1
- package/scripts/generate-llms-ctx.sh +17 -0
- package/scripts/generate-schema-docs.ts +31 -10
- package/scripts/sync-version.js +4 -12
- package/scripts/validate-schema.ts +2 -1
- package/spec/v0.2.8/CHANGELOG.md +401 -0
- package/spec/v0.2.8/README.md +72 -0
- package/spec/v0.2.8/migrations/v0.2.3-to-v0.2.4.md +599 -0
- package/spec/v0.2.8/migrations/v0.2.5-RC-to-v0.2.6.md +65 -0
- package/spec/v0.2.8/migrations/v0.2.6-to-v0.2.8.md +81 -0
- package/spec/{v0.2.6-dev/ossa-0.2.6-dev.schema.json → v0.2.8/ossa-0.2.8.schema.json} +1509 -52
- package/spec/v0.2.8/ossa-0.2.8.yaml +581 -0
- package/spec/v0.2.9/a2a-protocol.md +1337 -0
- package/spec/v0.2.9/agent.md +1946 -0
- package/spec/v0.2.9/capabilities/index.yaml +25 -0
- package/spec/v0.2.9/capabilities/memory.yaml +251 -0
- package/spec/v0.2.9/capability-schema.md +576 -0
- package/spec/v0.2.9/compliance-profiles.md +533 -0
- package/spec/v0.2.9/conformance-testing.md +1527 -0
- package/spec/v0.2.9/gitlab-duo-integration.md +621 -0
- package/spec/v0.2.9/ossa-0.2.9.schema.json +3699 -0
- package/spec/v0.2.9/runtime-semantics.md +464 -0
- package/spec/v0.2.9/security-model.md +1245 -0
- package/spec/v0.2.9/semantic-conventions.md +347 -0
- package/spec/v0.2.9/types.ts +522 -0
- package/test-results/junit.xml +337 -0
- package/.github/workflows/pr-comment.yml +0 -33
- package/bin/validate-ossa-0.2.5-RC.ts +0 -244
- package/dist/spec/v0.2.6-dev/ossa-0.2.5.schema.json +0 -1696
- package/dist/spec/v0.2.6-dev/ossa-0.2.6-dev.schema.json +0 -1696
- package/scripts/lib/exec.ts +0 -37
- package/scripts/lib/file-ops.ts +0 -58
- package/scripts/lib/version.ts +0 -83
- package/website/.lighthouserc.ts +0 -24
- package/website/.prettierrc +0 -10
- package/website/DESIGN_SYSTEM_IMPLEMENTATION.md +0 -445
- package/website/Dockerfile +0 -30
- package/website/app/about/page.tsx +0 -304
- package/website/app/blog/[slug]/page.tsx +0 -208
- package/website/app/blog/page.tsx +0 -249
- package/website/app/design-guide/page.tsx +0 -511
- package/website/app/docs/[[...slug]]/page.tsx +0 -847
- package/website/app/docs/core-concepts/project-structure/page.tsx +0 -349
- package/website/app/ecosystem/page.tsx +0 -410
- package/website/app/examples/page.tsx +0 -133
- package/website/app/globals.scss +0 -370
- package/website/app/layout.tsx +0 -106
- package/website/app/license/page.tsx +0 -183
- package/website/app/not-found.tsx +0 -18
- package/website/app/page.tsx +0 -686
- package/website/app/page.tsx.bak +0 -679
- package/website/app/page.tsx.bak2 +0 -649
- package/website/app/playground/page.tsx +0 -487
- package/website/app/robots.ts +0 -19
- package/website/app/rss.xml/route.ts +0 -74
- package/website/app/schema/page.tsx +0 -1001
- package/website/app/sitemap.ts +0 -56
- package/website/app/specification/page.tsx +0 -287
- package/website/components/InstallCommand.tsx +0 -96
- package/website/components/Logo.tsx +0 -97
- package/website/components/StructuredData.tsx +0 -65
- package/website/components/docs/DocsSearch.tsx +0 -104
- package/website/components/docs/DocsSidebar.tsx +0 -155
- package/website/components/docs/MarkdownContent.tsx +0 -401
- package/website/components/docs/VersionSelector.tsx +0 -105
- package/website/components/examples/ExamplesViewer.tsx +0 -293
- package/website/components/layout/Footer.tsx +0 -116
- package/website/components/layout/Header.tsx +0 -172
- package/website/components/schema/SchemaComponentsAccordion.tsx +0 -84
- package/website/components/schema/SchemaExplorer.tsx +0 -213
- package/website/components/ui/Badge.tsx +0 -82
- package/website/components/ui/Button.tsx +0 -116
- package/website/components/ui/Card.tsx +0 -167
- package/website/components/ui/Checkbox.tsx +0 -141
- package/website/components/ui/Input.tsx +0 -169
- package/website/components/ui/Radio.tsx +0 -141
- package/website/components/ui/Select.tsx +0 -182
- package/website/components/ui/Tag.tsx +0 -158
- package/website/components/ui/Textarea.tsx +0 -195
- package/website/components/ui/index.ts +0 -11
- package/website/content/blog/OpenAPI-AI-Agents-Standard.md +0 -285
- package/website/content/blog/Why-Formal-Standards-Matter-Now.md +0 -198
- package/website/content/blog/gitlab-kubernetes-agent-ecosystem.md +0 -286
- package/website/content/blog/introducing-ossa-framework.md +0 -328
- package/website/content/blog/ossa-production-results.md +0 -279
- package/website/content/blog/welcome-to-ossa.md +0 -43
- package/website/content/blog/why-ai-agents-need-open-standard.md +0 -98
- package/website/content/docs/00-home.md +0 -153
- package/website/content/docs/adapters/openai-adapter.md +0 -693
- package/website/content/docs/agents/catalog.md +0 -28
- package/website/content/docs/aiflow-framework-integration-with-ossa.md +0 -107
- package/website/content/docs/api-reference/index.md +0 -38
- package/website/content/docs/api-reference/ossa-core-api.md +0 -634
- package/website/content/docs/api-reference/ossa-registry-api.md +0 -515
- package/website/content/docs/api-reference/unified-agent-gateway.md +0 -599
- package/website/content/docs/architecture/execution-flow.md +0 -335
- package/website/content/docs/architecture/multi-agent-systems.md +0 -737
- package/website/content/docs/architecture/overview.md +0 -121
- package/website/content/docs/architecture/stack-integration.md +0 -461
- package/website/content/docs/changelog.md +0 -246
- package/website/content/docs/cli-reference/index.md +0 -111
- package/website/content/docs/cli-reference/ossa-agents.md +0 -70
- package/website/content/docs/cli-reference/ossa-export.md +0 -56
- package/website/content/docs/cli-reference/ossa-generate.md +0 -66
- package/website/content/docs/cli-reference/ossa-gitlab-agent.md +0 -57
- package/website/content/docs/cli-reference/ossa-import.md +0 -56
- package/website/content/docs/cli-reference/ossa-init.md +0 -57
- package/website/content/docs/cli-reference/ossa-migrate.md +0 -62
- package/website/content/docs/cli-reference/ossa-run.md +0 -66
- package/website/content/docs/cli-reference/ossa-schema.md +0 -57
- package/website/content/docs/cli-reference/ossa-setup.md +0 -57
- package/website/content/docs/cli-reference/ossa-validate.md +0 -66
- package/website/content/docs/configuration/index.md +0 -97
- package/website/content/docs/contributing.md +0 -599
- package/website/content/docs/deployment/github-mirroring.md +0 -924
- package/website/content/docs/documentation.md +0 -100
- package/website/content/docs/ecosystem/framework-support.md +0 -1361
- package/website/content/docs/ecosystem/overview.md +0 -366
- package/website/content/docs/errors/index.md +0 -10
- package/website/content/docs/examples/aiflow-framework-integration-with-ossa.md +0 -107
- package/website/content/docs/examples/catalog.md +0 -300
- package/website/content/docs/for-audiences/students-researchers.md +0 -122
- package/website/content/docs/getting-started/index.md +0 -92
- package/website/content/docs/getting-started/installation.md +0 -155
- package/website/content/docs/getting-started/running-agents.md +0 -309
- package/website/content/docs/getting-started.md +0 -91
- package/website/content/docs/integrations/aiflow.md +0 -104
- package/website/content/docs/integrations/drupal.md +0 -105
- package/website/content/docs/migration-guides/agent-schema-comparison.md +0 -232
- package/website/content/docs/migration-guides/anthropic-mcp-to-ossa.md +0 -1750
- package/website/content/docs/migration-guides/crewai-to-ossa.md +0 -274
- package/website/content/docs/migration-guides/drupal-eca-to-ossa.md +0 -2017
- package/website/content/docs/migration-guides/general-agent-schema.yml +0 -247
- package/website/content/docs/migration-guides/index.md +0 -133
- package/website/content/docs/migration-guides/langchain-to-ossa.md +0 -1714
- package/website/content/docs/migration-guides/langflow-to-ossa.md +0 -2075
- package/website/content/docs/migration-guides/migration-manifest.json +0 -64
- package/website/content/docs/migration-guides/openai-to-ossa.md +0 -1202
- package/website/content/docs/openapi-extensions/examples.md +0 -550
- package/website/content/docs/openapi-extensions/index.md +0 -551
- package/website/content/docs/openapi-extensions/operation-extensions.md +0 -457
- package/website/content/docs/openapi-extensions/root-extensions.md +0 -410
- package/website/content/docs/ossa-compliant-badge.md +0 -251
- package/website/content/docs/pre-release/index.md +0 -175
- package/website/content/docs/quick-reference.md +0 -17
- package/website/content/docs/readme.md +0 -35
- package/website/content/docs/releases/v0.2.6.md +0 -99
- package/website/content/docs/schema-reference/agent-capabilities.md +0 -50
- package/website/content/docs/schema-reference/agent-id.md +0 -52
- package/website/content/docs/schema-reference/agent-name.md +0 -50
- package/website/content/docs/schema-reference/agent-role.md +0 -54
- package/website/content/docs/schema-reference/agent-spec.md +0 -406
- package/website/content/docs/schema-reference/agent-version.md +0 -50
- package/website/content/docs/schema-reference/autonomy.md +0 -568
- package/website/content/docs/schema-reference/constraints.md +0 -543
- package/website/content/docs/schema-reference/index.md +0 -45
- package/website/content/docs/schema-reference/llm-config.md +0 -445
- package/website/content/docs/schema-reference/observability.md +0 -654
- package/website/content/docs/schema-reference/ossa-manifest.md +0 -309
- package/website/content/docs/schema-reference/taxonomy.md +0 -509
- package/website/content/docs/schema-reference/tools.md +0 -628
- package/website/content/docs/templates/blog-post.md +0 -43
- package/website/content/docs/types-reference/index.md +0 -105
- package/website/content/docs/use-cases/00-index.md +0 -395
- package/website/content/docs/use-cases/cicd-code-review.md +0 -1236
- package/website/content/docs/use-cases/customer-support.md +0 -1234
- package/website/content/docs/use-cases/enterprise-compliance.md +0 -1208
- package/website/content/docs/use-cases/research-multi-agent.md +0 -1161
- package/website/content/docs/versioning.md +0 -288
- package/website/dev.sh +0 -53
- package/website/docker-compose.dev.yml +0 -36
- package/website/lib/version.ts +0 -35
- package/website/lib/versions.json +0 -103
- package/website/next.config.ts +0 -18
- package/website/nginx.conf +0 -32
- package/website/package-lock.json +0 -9679
- package/website/package.json +0 -59
- package/website/postcss.config.mjs +0 -9
- package/website/scripts/fetch-versions.js +0 -166
- package/website/scripts/generate-examples-index.js +0 -163
- package/website/scripts/merge-docs-to-wiki.ts +0 -207
- package/website/scripts/sync-version.js +0 -72
- package/website/scripts/sync-wiki.ts +0 -322
- package/website/scripts/upload-wiki.ts +0 -199
- package/website/styles/_spacing.scss +0 -453
- package/website/styles/_tokens.scss +0 -245
- package/website/styles/_typography.scss +0 -361
- package/website/styles/_variables.scss +0 -287
- package/website/tailwind.config.ts +0 -170
|
@@ -1,1161 +0,0 @@
|
|
|
1
|
-
# Research Multi-Agent Debate System
|
|
2
|
-
|
|
3
|
-
## Problem Statement
|
|
4
|
-
|
|
5
|
-
Academic and corporate research teams face challenges in literature review and peer review:
|
|
6
|
-
- **Confirmation bias**: Single researchers miss contradictory evidence
|
|
7
|
-
- **Shallow analysis**: Time pressure leads to cursory paper reviews
|
|
8
|
-
- **Limited perspectives**: Reviewers bring only their domain expertise
|
|
9
|
-
- **Scalability**: Cannot review hundreds of papers in emerging fields
|
|
10
|
-
- **Reproducibility**: Manual reviews aren't systematic or reproducible
|
|
11
|
-
- **Quality variance**: Different reviewers apply different standards
|
|
12
|
-
|
|
13
|
-
**Solution**: A multi-agent OSSA system where specialized AI agents debate research papers from different perspectives (methodology, novelty, reproducibility, impact), forcing rigorous analysis through adversarial collaboration and structured argumentation.
|
|
14
|
-
|
|
15
|
-
## Architecture Overview
|
|
16
|
-
|
|
17
|
-
```mermaid
|
|
18
|
-
graph TB
|
|
19
|
-
subgraph "Input"
|
|
20
|
-
Paper[Research Paper PDF]
|
|
21
|
-
Metadata[Paper Metadata]
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
subgraph "Orchestrator Agent"
|
|
25
|
-
Coord[Debate Coordinator]
|
|
26
|
-
Moderator[Debate Moderator]
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
subgraph "Specialist Agents"
|
|
30
|
-
Method[Methodology Critic]
|
|
31
|
-
Novel[Novelty Assessor]
|
|
32
|
-
Repro[Reproducibility Checker]
|
|
33
|
-
Impact[Impact Evaluator]
|
|
34
|
-
Ethics[Ethics Reviewer]
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
subgraph "Debate Process"
|
|
38
|
-
Round1[Round 1: Initial Positions]
|
|
39
|
-
Round2[Round 2: Rebuttals]
|
|
40
|
-
Round3[Round 3: Evidence]
|
|
41
|
-
Round4[Round 4: Consensus]
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
subgraph "Knowledge Base"
|
|
45
|
-
ArXiv[ArXiv Index]
|
|
46
|
-
PubMed[PubMed Index]
|
|
47
|
-
Citations[Citation Graph]
|
|
48
|
-
Code[Code Repositories]
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
subgraph "Outputs"
|
|
52
|
-
Summary[Executive Summary]
|
|
53
|
-
Scores[Multi-dimensional Scores]
|
|
54
|
-
Report[Detailed Review Report]
|
|
55
|
-
Recommend[Accept/Revise/Reject]
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
Paper --> Coord
|
|
59
|
-
Metadata --> Coord
|
|
60
|
-
|
|
61
|
-
Coord --> Method
|
|
62
|
-
Coord --> Novel
|
|
63
|
-
Coord --> Repro
|
|
64
|
-
Coord --> Impact
|
|
65
|
-
Coord --> Ethics
|
|
66
|
-
|
|
67
|
-
Method --> Round1
|
|
68
|
-
Novel --> Round1
|
|
69
|
-
Repro --> Round1
|
|
70
|
-
Impact --> Round1
|
|
71
|
-
Ethics --> Round1
|
|
72
|
-
|
|
73
|
-
Round1 --> Moderator
|
|
74
|
-
Moderator --> Round2
|
|
75
|
-
Round2 --> Moderator
|
|
76
|
-
Moderator --> Round3
|
|
77
|
-
Round3 --> Moderator
|
|
78
|
-
Moderator --> Round4
|
|
79
|
-
|
|
80
|
-
Method -.->|Search| ArXiv
|
|
81
|
-
Novel -.->|Search| PubMed
|
|
82
|
-
Repro -.->|Check| Code
|
|
83
|
-
Impact -.->|Analyze| Citations
|
|
84
|
-
|
|
85
|
-
Round4 --> Summary
|
|
86
|
-
Round4 --> Scores
|
|
87
|
-
Round4 --> Report
|
|
88
|
-
Round4 --> Recommend
|
|
89
|
-
|
|
90
|
-
style Coord fill:#e1f5ff
|
|
91
|
-
style Moderator fill:#ffe1f5
|
|
92
|
-
style Round4 fill:#e1ffe1
|
|
93
|
-
style Recommend fill:#ffe1e1
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
## OSSA Manifests
|
|
97
|
-
|
|
98
|
-
### Orchestrator Agent
|
|
99
|
-
|
|
100
|
-
```yaml
|
|
101
|
-
apiVersion: ossa/v0.2.x
|
|
102
|
-
kind: Agent
|
|
103
|
-
metadata:
|
|
104
|
-
name: research-debate-orchestrator
|
|
105
|
-
namespace: research
|
|
106
|
-
labels:
|
|
107
|
-
app: research-review
|
|
108
|
-
tier: orchestration
|
|
109
|
-
role: coordinator
|
|
110
|
-
annotations:
|
|
111
|
-
research.ossa.io/debate-rounds: "4"
|
|
112
|
-
research.ossa.io/min-agents: "3"
|
|
113
|
-
research.ossa.io/max-debate-duration: "3600"
|
|
114
|
-
|
|
115
|
-
spec:
|
|
116
|
-
type: orchestrator
|
|
117
|
-
description: |
|
|
118
|
-
Orchestrates multi-agent debate system for research paper review.
|
|
119
|
-
Coordinates specialist agents, moderates debate rounds, and
|
|
120
|
-
synthesizes consensus recommendations.
|
|
121
|
-
|
|
122
|
-
runtime:
|
|
123
|
-
type: docker
|
|
124
|
-
image: registry.example.com/agents/research-orchestrator:1.0.0
|
|
125
|
-
command: ["/app/orchestrator"]
|
|
126
|
-
resources:
|
|
127
|
-
limits:
|
|
128
|
-
cpu: "2000m"
|
|
129
|
-
memory: "4Gi"
|
|
130
|
-
requests:
|
|
131
|
-
cpu: "500m"
|
|
132
|
-
memory: "1Gi"
|
|
133
|
-
|
|
134
|
-
capabilities:
|
|
135
|
-
- name: orchestrate_review
|
|
136
|
-
description: |
|
|
137
|
-
Coordinate multi-agent debate for comprehensive paper review.
|
|
138
|
-
input_schema:
|
|
139
|
-
type: object
|
|
140
|
-
required: [paper_id, paper_url, title]
|
|
141
|
-
properties:
|
|
142
|
-
paper_id:
|
|
143
|
-
type: string
|
|
144
|
-
description: "Unique paper identifier (DOI, arXiv ID, etc.)"
|
|
145
|
-
paper_url:
|
|
146
|
-
type: string
|
|
147
|
-
format: uri
|
|
148
|
-
description: "URL to paper PDF"
|
|
149
|
-
title:
|
|
150
|
-
type: string
|
|
151
|
-
abstract:
|
|
152
|
-
type: string
|
|
153
|
-
authors:
|
|
154
|
-
type: array
|
|
155
|
-
items: {type: string}
|
|
156
|
-
venue:
|
|
157
|
-
type: string
|
|
158
|
-
year:
|
|
159
|
-
type: integer
|
|
160
|
-
field:
|
|
161
|
-
type: string
|
|
162
|
-
enum: [cs, bio, physics, medicine, social, interdisciplinary]
|
|
163
|
-
debate_config:
|
|
164
|
-
type: object
|
|
165
|
-
properties:
|
|
166
|
-
rounds:
|
|
167
|
-
type: integer
|
|
168
|
-
default: 4
|
|
169
|
-
minimum: 2
|
|
170
|
-
maximum: 10
|
|
171
|
-
time_limit_per_round:
|
|
172
|
-
type: integer
|
|
173
|
-
default: 300
|
|
174
|
-
require_consensus:
|
|
175
|
-
type: boolean
|
|
176
|
-
default: false
|
|
177
|
-
output_schema:
|
|
178
|
-
type: object
|
|
179
|
-
required: [review_id, recommendation, summary, scores]
|
|
180
|
-
properties:
|
|
181
|
-
review_id:
|
|
182
|
-
type: string
|
|
183
|
-
format: uuid
|
|
184
|
-
recommendation:
|
|
185
|
-
type: string
|
|
186
|
-
enum: [strong_accept, accept, weak_accept, borderline, weak_reject, reject, strong_reject]
|
|
187
|
-
confidence:
|
|
188
|
-
type: number
|
|
189
|
-
minimum: 0
|
|
190
|
-
maximum: 1
|
|
191
|
-
summary:
|
|
192
|
-
type: object
|
|
193
|
-
required: [strengths, weaknesses, key_findings]
|
|
194
|
-
properties:
|
|
195
|
-
strengths:
|
|
196
|
-
type: array
|
|
197
|
-
items: {type: string}
|
|
198
|
-
weaknesses:
|
|
199
|
-
type: array
|
|
200
|
-
items: {type: string}
|
|
201
|
-
key_findings:
|
|
202
|
-
type: array
|
|
203
|
-
items: {type: string}
|
|
204
|
-
controversial_points:
|
|
205
|
-
type: array
|
|
206
|
-
items: {type: string}
|
|
207
|
-
scores:
|
|
208
|
-
type: object
|
|
209
|
-
required: [methodology, novelty, reproducibility, impact, ethics]
|
|
210
|
-
properties:
|
|
211
|
-
methodology:
|
|
212
|
-
type: object
|
|
213
|
-
properties:
|
|
214
|
-
score: {type: number, minimum: 1, maximum: 10}
|
|
215
|
-
reasoning: {type: string}
|
|
216
|
-
novelty:
|
|
217
|
-
type: object
|
|
218
|
-
properties:
|
|
219
|
-
score: {type: number, minimum: 1, maximum: 10}
|
|
220
|
-
reasoning: {type: string}
|
|
221
|
-
reproducibility:
|
|
222
|
-
type: object
|
|
223
|
-
properties:
|
|
224
|
-
score: {type: number, minimum: 1, maximum: 10}
|
|
225
|
-
reasoning: {type: string}
|
|
226
|
-
impact:
|
|
227
|
-
type: object
|
|
228
|
-
properties:
|
|
229
|
-
score: {type: number, minimum: 1, maximum: 10}
|
|
230
|
-
reasoning: {type: string}
|
|
231
|
-
ethics:
|
|
232
|
-
type: object
|
|
233
|
-
properties:
|
|
234
|
-
score: {type: number, minimum: 1, maximum: 10}
|
|
235
|
-
reasoning: {type: string}
|
|
236
|
-
overall:
|
|
237
|
-
type: number
|
|
238
|
-
minimum: 1
|
|
239
|
-
maximum: 10
|
|
240
|
-
debate_transcript:
|
|
241
|
-
type: array
|
|
242
|
-
items:
|
|
243
|
-
type: object
|
|
244
|
-
properties:
|
|
245
|
-
round: {type: integer}
|
|
246
|
-
agent: {type: string}
|
|
247
|
-
position: {type: string}
|
|
248
|
-
arguments: {type: array, items: {type: string}}
|
|
249
|
-
evidence: {type: array, items: {type: string}}
|
|
250
|
-
timestamp: {type: string, format: date-time}
|
|
251
|
-
related_work:
|
|
252
|
-
type: array
|
|
253
|
-
items:
|
|
254
|
-
type: object
|
|
255
|
-
properties:
|
|
256
|
-
title: {type: string}
|
|
257
|
-
authors: {type: array, items: {type: string}}
|
|
258
|
-
year: {type: integer}
|
|
259
|
-
relevance: {type: number}
|
|
260
|
-
citation_relation: {type: string}
|
|
261
|
-
timestamp:
|
|
262
|
-
type: string
|
|
263
|
-
format: date-time
|
|
264
|
-
|
|
265
|
-
policies:
|
|
266
|
-
debate:
|
|
267
|
-
min_rounds: 2
|
|
268
|
-
max_rounds: 10
|
|
269
|
-
require_consensus_threshold: 0.70
|
|
270
|
-
allow_abstention: false
|
|
271
|
-
time_limit_per_round_seconds: 300
|
|
272
|
-
max_total_duration_seconds: 3600
|
|
273
|
-
|
|
274
|
-
agent_selection:
|
|
275
|
-
required_specialists:
|
|
276
|
-
- methodology_critic
|
|
277
|
-
- novelty_assessor
|
|
278
|
-
- reproducibility_checker
|
|
279
|
-
optional_specialists:
|
|
280
|
-
- impact_evaluator
|
|
281
|
-
- ethics_reviewer
|
|
282
|
-
- domain_expert
|
|
283
|
-
min_agents: 3
|
|
284
|
-
max_agents: 7
|
|
285
|
-
|
|
286
|
-
consensus:
|
|
287
|
-
agreement_threshold: 0.70
|
|
288
|
-
max_disagreement_rounds: 3
|
|
289
|
-
tie_breaking: moderator_decision
|
|
290
|
-
|
|
291
|
-
integration:
|
|
292
|
-
protocol: agent-to-agent
|
|
293
|
-
api_version: v1
|
|
294
|
-
|
|
295
|
-
agent_registry:
|
|
296
|
-
service_name: ossa-agent-registry
|
|
297
|
-
namespace: research
|
|
298
|
-
discovery: dns
|
|
299
|
-
|
|
300
|
-
messaging:
|
|
301
|
-
protocol: grpc
|
|
302
|
-
timeout_seconds: 30
|
|
303
|
-
retry_count: 3
|
|
304
|
-
|
|
305
|
-
monitoring:
|
|
306
|
-
metrics:
|
|
307
|
-
custom_metrics:
|
|
308
|
-
- debates_completed_total
|
|
309
|
-
- average_debate_duration_seconds
|
|
310
|
-
- consensus_rate
|
|
311
|
-
- average_agreement_score
|
|
312
|
-
- papers_reviewed_total
|
|
313
|
-
|
|
314
|
-
metadata:
|
|
315
|
-
version: 1.0.0
|
|
316
|
-
author:
|
|
317
|
-
name: Research Team
|
|
318
|
-
email: research@example.com
|
|
319
|
-
license: MIT
|
|
320
|
-
```
|
|
321
|
-
|
|
322
|
-
### Methodology Critic Agent
|
|
323
|
-
|
|
324
|
-
```yaml
|
|
325
|
-
apiVersion: ossa/v0.2.x
|
|
326
|
-
kind: Agent
|
|
327
|
-
metadata:
|
|
328
|
-
name: methodology-critic
|
|
329
|
-
namespace: research
|
|
330
|
-
labels:
|
|
331
|
-
app: research-review
|
|
332
|
-
tier: specialist
|
|
333
|
-
role: critic
|
|
334
|
-
|
|
335
|
-
spec:
|
|
336
|
-
type: worker
|
|
337
|
-
description: |
|
|
338
|
-
Specialized agent that critically evaluates research methodology,
|
|
339
|
-
experimental design, statistical rigor, and scientific validity.
|
|
340
|
-
|
|
341
|
-
capabilities:
|
|
342
|
-
- name: critique_methodology
|
|
343
|
-
description: |
|
|
344
|
-
Analyze research methodology for rigor, validity, and soundness.
|
|
345
|
-
input_schema:
|
|
346
|
-
type: object
|
|
347
|
-
required: [paper_content, methodology_section]
|
|
348
|
-
properties:
|
|
349
|
-
paper_content:
|
|
350
|
-
type: string
|
|
351
|
-
methodology_section:
|
|
352
|
-
type: string
|
|
353
|
-
field:
|
|
354
|
-
type: string
|
|
355
|
-
output_schema:
|
|
356
|
-
type: object
|
|
357
|
-
properties:
|
|
358
|
-
score:
|
|
359
|
-
type: number
|
|
360
|
-
minimum: 1
|
|
361
|
-
maximum: 10
|
|
362
|
-
strengths:
|
|
363
|
-
type: array
|
|
364
|
-
items: {type: string}
|
|
365
|
-
weaknesses:
|
|
366
|
-
type: array
|
|
367
|
-
items: {type: string}
|
|
368
|
-
concerns:
|
|
369
|
-
type: array
|
|
370
|
-
items:
|
|
371
|
-
type: object
|
|
372
|
-
properties:
|
|
373
|
-
concern: {type: string}
|
|
374
|
-
severity: {type: string, enum: [critical, major, minor]}
|
|
375
|
-
suggestion: {type: string}
|
|
376
|
-
statistical_validity:
|
|
377
|
-
type: object
|
|
378
|
-
properties:
|
|
379
|
-
appropriate_tests: {type: boolean}
|
|
380
|
-
sample_size_adequate: {type: boolean}
|
|
381
|
-
assumptions_met: {type: boolean}
|
|
382
|
-
corrections_applied: {type: boolean}
|
|
383
|
-
```
|
|
384
|
-
|
|
385
|
-
### Novelty Assessor Agent
|
|
386
|
-
|
|
387
|
-
```yaml
|
|
388
|
-
apiVersion: ossa/v0.2.x
|
|
389
|
-
kind: Agent
|
|
390
|
-
metadata:
|
|
391
|
-
name: novelty-assessor
|
|
392
|
-
namespace: research
|
|
393
|
-
labels:
|
|
394
|
-
app: research-review
|
|
395
|
-
tier: specialist
|
|
396
|
-
role: assessor
|
|
397
|
-
|
|
398
|
-
spec:
|
|
399
|
-
type: worker
|
|
400
|
-
description: |
|
|
401
|
-
Assesses research novelty by comparing against existing literature,
|
|
402
|
-
identifying incremental vs. breakthrough contributions.
|
|
403
|
-
|
|
404
|
-
capabilities:
|
|
405
|
-
- name: assess_novelty
|
|
406
|
-
description: |
|
|
407
|
-
Evaluate novelty and originality of research contribution.
|
|
408
|
-
input_schema:
|
|
409
|
-
type: object
|
|
410
|
-
required: [paper_content, related_work]
|
|
411
|
-
properties:
|
|
412
|
-
paper_content:
|
|
413
|
-
type: string
|
|
414
|
-
related_work:
|
|
415
|
-
type: array
|
|
416
|
-
items:
|
|
417
|
-
type: object
|
|
418
|
-
output_schema:
|
|
419
|
-
type: object
|
|
420
|
-
properties:
|
|
421
|
-
score:
|
|
422
|
-
type: number
|
|
423
|
-
minimum: 1
|
|
424
|
-
maximum: 10
|
|
425
|
-
novelty_type:
|
|
426
|
-
type: string
|
|
427
|
-
enum: [breakthrough, significant, incremental, derivative]
|
|
428
|
-
unique_contributions:
|
|
429
|
-
type: array
|
|
430
|
-
items: {type: string}
|
|
431
|
-
overlaps_with_existing:
|
|
432
|
-
type: array
|
|
433
|
-
items:
|
|
434
|
-
type: object
|
|
435
|
-
properties:
|
|
436
|
-
paper: {type: string}
|
|
437
|
-
overlap_description: {type: string}
|
|
438
|
-
degree: {type: string, enum: [high, medium, low]}
|
|
439
|
-
```
|
|
440
|
-
|
|
441
|
-
### Reproducibility Checker Agent
|
|
442
|
-
|
|
443
|
-
```yaml
|
|
444
|
-
apiVersion: ossa/v0.2.x
|
|
445
|
-
kind: Agent
|
|
446
|
-
metadata:
|
|
447
|
-
name: reproducibility-checker
|
|
448
|
-
namespace: research
|
|
449
|
-
labels:
|
|
450
|
-
app: research-review
|
|
451
|
-
tier: specialist
|
|
452
|
-
role: checker
|
|
453
|
-
|
|
454
|
-
spec:
|
|
455
|
-
type: worker
|
|
456
|
-
description: |
|
|
457
|
-
Evaluates reproducibility by checking code availability,
|
|
458
|
-
data sharing, experimental details, and documentation quality.
|
|
459
|
-
|
|
460
|
-
capabilities:
|
|
461
|
-
- name: check_reproducibility
|
|
462
|
-
description: |
|
|
463
|
-
Assess reproducibility of research findings.
|
|
464
|
-
input_schema:
|
|
465
|
-
type: object
|
|
466
|
-
required: [paper_content]
|
|
467
|
-
properties:
|
|
468
|
-
paper_content:
|
|
469
|
-
type: string
|
|
470
|
-
code_repository:
|
|
471
|
-
type: string
|
|
472
|
-
format: uri
|
|
473
|
-
data_availability:
|
|
474
|
-
type: string
|
|
475
|
-
output_schema:
|
|
476
|
-
type: object
|
|
477
|
-
properties:
|
|
478
|
-
score:
|
|
479
|
-
type: number
|
|
480
|
-
minimum: 1
|
|
481
|
-
maximum: 10
|
|
482
|
-
code_available:
|
|
483
|
-
type: boolean
|
|
484
|
-
data_available:
|
|
485
|
-
type: boolean
|
|
486
|
-
sufficient_details:
|
|
487
|
-
type: boolean
|
|
488
|
-
dependencies_specified:
|
|
489
|
-
type: boolean
|
|
490
|
-
environment_documented:
|
|
491
|
-
type: boolean
|
|
492
|
-
reproducibility_barriers:
|
|
493
|
-
type: array
|
|
494
|
-
items:
|
|
495
|
-
type: object
|
|
496
|
-
properties:
|
|
497
|
-
barrier: {type: string}
|
|
498
|
-
severity: {type: string}
|
|
499
|
-
mitigation: {type: string}
|
|
500
|
-
```
|
|
501
|
-
|
|
502
|
-
## Implementation (Python)
|
|
503
|
-
|
|
504
|
-
```python
|
|
505
|
-
"""
|
|
506
|
-
Research Multi-Agent Debate System
|
|
507
|
-
|
|
508
|
-
Multi-agent system for rigorous research paper review through
|
|
509
|
-
adversarial collaboration and structured debate.
|
|
510
|
-
"""
|
|
511
|
-
|
|
512
|
-
import asyncio
|
|
513
|
-
from dataclasses import dataclass, field
|
|
514
|
-
from typing import List, Dict, Optional, Any
|
|
515
|
-
from enum import Enum
|
|
516
|
-
import anthropic
|
|
517
|
-
import arxiv
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
class AgentRole(Enum):
|
|
521
|
-
METHODOLOGY_CRITIC = "methodology_critic"
|
|
522
|
-
NOVELTY_ASSESSOR = "novelty_assessor"
|
|
523
|
-
REPRODUCIBILITY_CHECKER = "reproducibility_checker"
|
|
524
|
-
IMPACT_EVALUATOR = "impact_evaluator"
|
|
525
|
-
ETHICS_REVIEWER = "ethics_reviewer"
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
class Recommendation(Enum):
|
|
529
|
-
STRONG_ACCEPT = "strong_accept"
|
|
530
|
-
ACCEPT = "accept"
|
|
531
|
-
WEAK_ACCEPT = "weak_accept"
|
|
532
|
-
BORDERLINE = "borderline"
|
|
533
|
-
WEAK_REJECT = "weak_reject"
|
|
534
|
-
REJECT = "reject"
|
|
535
|
-
STRONG_REJECT = "strong_reject"
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
@dataclass
|
|
539
|
-
class DebateArgument:
|
|
540
|
-
agent: AgentRole
|
|
541
|
-
round_number: int
|
|
542
|
-
position: str
|
|
543
|
-
arguments: List[str]
|
|
544
|
-
evidence: List[str]
|
|
545
|
-
score: Optional[float] = None
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
@dataclass
|
|
549
|
-
class AgentScore:
|
|
550
|
-
score: float # 1-10
|
|
551
|
-
reasoning: str
|
|
552
|
-
strengths: List[str] = field(default_factory=list)
|
|
553
|
-
weaknesses: List[str] = field(default_factory=list)
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
@dataclass
|
|
557
|
-
class ReviewResult:
|
|
558
|
-
review_id: str
|
|
559
|
-
recommendation: Recommendation
|
|
560
|
-
confidence: float
|
|
561
|
-
summary: Dict[str, Any]
|
|
562
|
-
scores: Dict[str, AgentScore]
|
|
563
|
-
debate_transcript: List[DebateArgument]
|
|
564
|
-
overall_score: float
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
class SpecialistAgent:
|
|
568
|
-
"""Base class for specialist review agents."""
|
|
569
|
-
|
|
570
|
-
def __init__(self, role: AgentRole, model: str = "claude-3-5-sonnet-20241022"):
|
|
571
|
-
self.role = role
|
|
572
|
-
self.model = model
|
|
573
|
-
self.client = anthropic.Anthropic()
|
|
574
|
-
|
|
575
|
-
async def initial_review(self, paper_content: str, context: Dict[str, Any]) -> AgentScore:
|
|
576
|
-
"""Generate initial review from agent's perspective."""
|
|
577
|
-
raise NotImplementedError
|
|
578
|
-
|
|
579
|
-
async def rebuttal(self, other_arguments: List[DebateArgument], paper_content: str) -> DebateArgument:
|
|
580
|
-
"""Generate rebuttal to other agents' arguments."""
|
|
581
|
-
raise NotImplementedError
|
|
582
|
-
|
|
583
|
-
async def provide_evidence(self, disputed_points: List[str], paper_content: str) -> DebateArgument:
|
|
584
|
-
"""Provide evidence for disputed points."""
|
|
585
|
-
raise NotImplementedError
|
|
586
|
-
|
|
587
|
-
async def final_position(self, all_arguments: List[DebateArgument]) -> AgentScore:
|
|
588
|
-
"""Final position after all debate rounds."""
|
|
589
|
-
raise NotImplementedError
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
class MethodologyCritic(SpecialistAgent):
|
|
593
|
-
"""Agent that critiques research methodology."""
|
|
594
|
-
|
|
595
|
-
def __init__(self):
|
|
596
|
-
super().__init__(AgentRole.METHODOLOGY_CRITIC)
|
|
597
|
-
|
|
598
|
-
async def initial_review(self, paper_content: str, context: Dict[str, Any]) -> AgentScore:
|
|
599
|
-
"""Critique methodology rigor."""
|
|
600
|
-
prompt = f"""
|
|
601
|
-
You are a methodology critic reviewing a research paper. Evaluate the research methodology
|
|
602
|
-
for rigor, validity, and scientific soundness.
|
|
603
|
-
|
|
604
|
-
Paper content:
|
|
605
|
-
{paper_content[:4000]}
|
|
606
|
-
|
|
607
|
-
Evaluate:
|
|
608
|
-
1. Experimental design quality
|
|
609
|
-
2. Statistical rigor and appropriate tests
|
|
610
|
-
3. Sample size adequacy
|
|
611
|
-
4. Control variables and confounds
|
|
612
|
-
5. Validity threats
|
|
613
|
-
|
|
614
|
-
Provide:
|
|
615
|
-
- Score (1-10, where 10 is perfect methodology)
|
|
616
|
-
- Reasoning for the score
|
|
617
|
-
- List of strengths
|
|
618
|
-
- List of weaknesses
|
|
619
|
-
|
|
620
|
-
Return JSON format.
|
|
621
|
-
"""
|
|
622
|
-
|
|
623
|
-
response = await asyncio.to_thread(
|
|
624
|
-
self.client.messages.create,
|
|
625
|
-
model=self.model,
|
|
626
|
-
max_tokens=2000,
|
|
627
|
-
messages=[{"role": "user", "content": prompt}]
|
|
628
|
-
)
|
|
629
|
-
|
|
630
|
-
# Parse response (simplified)
|
|
631
|
-
return AgentScore(
|
|
632
|
-
score=7.5,
|
|
633
|
-
reasoning="Methodology is generally sound but lacks power analysis",
|
|
634
|
-
strengths=["Rigorous experimental design", "Appropriate statistical tests"],
|
|
635
|
-
weaknesses=["Small sample size", "Missing power analysis"]
|
|
636
|
-
)
|
|
637
|
-
|
|
638
|
-
async def rebuttal(self, other_arguments: List[DebateArgument], paper_content: str) -> DebateArgument:
|
|
639
|
-
"""Respond to other agents' arguments."""
|
|
640
|
-
other_positions = "\n\n".join([
|
|
641
|
-
f"{arg.agent.value}: {arg.position}\nArguments: {', '.join(arg.arguments)}"
|
|
642
|
-
for arg in other_arguments
|
|
643
|
-
])
|
|
644
|
-
|
|
645
|
-
prompt = f"""
|
|
646
|
-
As a methodology critic, respond to these other review positions:
|
|
647
|
-
|
|
648
|
-
{other_positions}
|
|
649
|
-
|
|
650
|
-
Focus on methodological concerns they may have missed or methodological strengths
|
|
651
|
-
they undervalued.
|
|
652
|
-
|
|
653
|
-
Provide:
|
|
654
|
-
- Your position
|
|
655
|
-
- Specific arguments
|
|
656
|
-
- Evidence from the paper
|
|
657
|
-
"""
|
|
658
|
-
|
|
659
|
-
response = await asyncio.to_thread(
|
|
660
|
-
self.client.messages.create,
|
|
661
|
-
model=self.model,
|
|
662
|
-
max_tokens=1500,
|
|
663
|
-
messages=[{"role": "user", "content": prompt}]
|
|
664
|
-
)
|
|
665
|
-
|
|
666
|
-
return DebateArgument(
|
|
667
|
-
agent=self.role,
|
|
668
|
-
round_number=2,
|
|
669
|
-
position="Methodology concerns are critical",
|
|
670
|
-
arguments=["Power analysis missing", "Sample size too small for claims"],
|
|
671
|
-
evidence=["Section 3.1 describes N=50 participants"]
|
|
672
|
-
)
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
class NoveltyAssessor(SpecialistAgent):
|
|
676
|
-
"""Agent that assesses research novelty."""
|
|
677
|
-
|
|
678
|
-
def __init__(self):
|
|
679
|
-
super().__init__(AgentRole.NOVELTY_ASSESSOR)
|
|
680
|
-
|
|
681
|
-
async def initial_review(self, paper_content: str, context: Dict[str, Any]) -> AgentScore:
|
|
682
|
-
"""Assess novelty and originality."""
|
|
683
|
-
|
|
684
|
-
# Search for related work
|
|
685
|
-
related_papers = await self._search_related_work(context.get('title', ''))
|
|
686
|
-
|
|
687
|
-
prompt = f"""
|
|
688
|
-
You are a novelty assessor. Evaluate the originality and novelty of this research.
|
|
689
|
-
|
|
690
|
-
Paper content:
|
|
691
|
-
{paper_content[:4000]}
|
|
692
|
-
|
|
693
|
-
Related work found:
|
|
694
|
-
{self._format_related_papers(related_papers)}
|
|
695
|
-
|
|
696
|
-
Evaluate:
|
|
697
|
-
1. Degree of novelty (breakthrough, significant, incremental, derivative)
|
|
698
|
-
2. Unique contributions
|
|
699
|
-
3. Overlap with existing work
|
|
700
|
-
4. Advancement over state-of-the-art
|
|
701
|
-
|
|
702
|
-
Provide:
|
|
703
|
-
- Score (1-10, where 10 is groundbreaking novelty)
|
|
704
|
-
- Reasoning
|
|
705
|
-
- Strengths
|
|
706
|
-
- Weaknesses
|
|
707
|
-
"""
|
|
708
|
-
|
|
709
|
-
response = await asyncio.to_thread(
|
|
710
|
-
self.client.messages.create,
|
|
711
|
-
model=self.model,
|
|
712
|
-
max_tokens=2000,
|
|
713
|
-
messages=[{"role": "user", "content": prompt}]
|
|
714
|
-
)
|
|
715
|
-
|
|
716
|
-
return AgentScore(
|
|
717
|
-
score=8.0,
|
|
718
|
-
reasoning="Significant advancement with novel approach",
|
|
719
|
-
strengths=["New algorithmic contribution", "Outperforms baselines"],
|
|
720
|
-
weaknesses=["Limited to specific domain", "Builds heavily on prior work"]
|
|
721
|
-
)
|
|
722
|
-
|
|
723
|
-
async def _search_related_work(self, title: str) -> List[Dict[str, Any]]:
|
|
724
|
-
"""Search arXiv for related papers."""
|
|
725
|
-
try:
|
|
726
|
-
search = arxiv.Search(
|
|
727
|
-
query=title,
|
|
728
|
-
max_results=10,
|
|
729
|
-
sort_by=arxiv.SortCriterion.Relevance
|
|
730
|
-
)
|
|
731
|
-
|
|
732
|
-
results = []
|
|
733
|
-
for result in search.results():
|
|
734
|
-
results.append({
|
|
735
|
-
'title': result.title,
|
|
736
|
-
'authors': [a.name for a in result.authors],
|
|
737
|
-
'summary': result.summary[:200],
|
|
738
|
-
'year': result.published.year
|
|
739
|
-
})
|
|
740
|
-
|
|
741
|
-
return results
|
|
742
|
-
except Exception as e:
|
|
743
|
-
print(f"Error searching arXiv: {e}")
|
|
744
|
-
return []
|
|
745
|
-
|
|
746
|
-
def _format_related_papers(self, papers: List[Dict[str, Any]]) -> str:
|
|
747
|
-
"""Format related papers for prompt."""
|
|
748
|
-
return "\n\n".join([
|
|
749
|
-
f"- {p['title']} ({p['year']})\n {p['summary']}"
|
|
750
|
-
for p in papers[:5]
|
|
751
|
-
])
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
class ReproducibilityChecker(SpecialistAgent):
|
|
755
|
-
"""Agent that checks reproducibility."""
|
|
756
|
-
|
|
757
|
-
def __init__(self):
|
|
758
|
-
super().__init__(AgentRole.REPRODUCIBILITY_CHECKER)
|
|
759
|
-
|
|
760
|
-
async def initial_review(self, paper_content: str, context: Dict[str, Any]) -> AgentScore:
|
|
761
|
-
"""Check reproducibility."""
|
|
762
|
-
prompt = f"""
|
|
763
|
-
You are a reproducibility checker. Evaluate how reproducible this research is.
|
|
764
|
-
|
|
765
|
-
Paper content:
|
|
766
|
-
{paper_content[:4000]}
|
|
767
|
-
|
|
768
|
-
Code repository: {context.get('code_repo', 'Not provided')}
|
|
769
|
-
Data availability: {context.get('data_availability', 'Not specified')}
|
|
770
|
-
|
|
771
|
-
Evaluate:
|
|
772
|
-
1. Code availability and quality
|
|
773
|
-
2. Data availability and accessibility
|
|
774
|
-
3. Experimental details sufficiency
|
|
775
|
-
4. Dependency specifications
|
|
776
|
-
5. Environment documentation
|
|
777
|
-
|
|
778
|
-
Provide:
|
|
779
|
-
- Score (1-10, where 10 is fully reproducible)
|
|
780
|
-
- Reasoning
|
|
781
|
-
- Specific reproducibility barriers
|
|
782
|
-
- Suggestions for improvement
|
|
783
|
-
"""
|
|
784
|
-
|
|
785
|
-
response = await asyncio.to_thread(
|
|
786
|
-
self.client.messages.create,
|
|
787
|
-
model=self.model,
|
|
788
|
-
max_tokens=2000,
|
|
789
|
-
messages=[{"role": "user", "content": prompt}]
|
|
790
|
-
)
|
|
791
|
-
|
|
792
|
-
return AgentScore(
|
|
793
|
-
score=6.0,
|
|
794
|
-
reasoning="Code available but insufficient documentation",
|
|
795
|
-
strengths=["Code repository provided", "Clear experimental protocol"],
|
|
796
|
-
weaknesses=["Missing dependencies", "Data not publicly available", "Environment setup unclear"]
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
class DebateOrchestrator:
|
|
801
|
-
"""Orchestrates multi-agent debate for paper review."""
|
|
802
|
-
|
|
803
|
-
def __init__(self):
|
|
804
|
-
self.agents: List[SpecialistAgent] = [
|
|
805
|
-
MethodologyCritic(),
|
|
806
|
-
NoveltyAssessor(),
|
|
807
|
-
ReproducibilityChecker(),
|
|
808
|
-
]
|
|
809
|
-
self.client = anthropic.Anthropic()
|
|
810
|
-
|
|
811
|
-
async def orchestrate_review(
|
|
812
|
-
self,
|
|
813
|
-
paper_id: str,
|
|
814
|
-
paper_content: str,
|
|
815
|
-
metadata: Dict[str, Any],
|
|
816
|
-
config: Optional[Dict[str, Any]] = None
|
|
817
|
-
) -> ReviewResult:
|
|
818
|
-
"""Orchestrate complete multi-agent review."""
|
|
819
|
-
|
|
820
|
-
config = config or {'rounds': 4}
|
|
821
|
-
review_id = self._generate_review_id()
|
|
822
|
-
|
|
823
|
-
print(f"Starting review {review_id} for paper: {metadata.get('title', 'Unknown')}")
|
|
824
|
-
|
|
825
|
-
# Round 1: Initial positions
|
|
826
|
-
print("\n=== Round 1: Initial Positions ===")
|
|
827
|
-
initial_scores = await self._round_1_initial_positions(paper_content, metadata)
|
|
828
|
-
|
|
829
|
-
# Round 2: Rebuttals
|
|
830
|
-
print("\n=== Round 2: Rebuttals ===")
|
|
831
|
-
rebuttals = await self._round_2_rebuttals(paper_content, initial_scores)
|
|
832
|
-
|
|
833
|
-
# Round 3: Evidence gathering
|
|
834
|
-
print("\n=== Round 3: Evidence ===")
|
|
835
|
-
evidence = await self._round_3_evidence(paper_content, rebuttals)
|
|
836
|
-
|
|
837
|
-
# Round 4: Consensus building
|
|
838
|
-
print("\n=== Round 4: Consensus ===")
|
|
839
|
-
final_scores = await self._round_4_consensus(initial_scores, rebuttals, evidence)
|
|
840
|
-
|
|
841
|
-
# Synthesize recommendation
|
|
842
|
-
recommendation, confidence = self._synthesize_recommendation(final_scores)
|
|
843
|
-
|
|
844
|
-
# Generate summary
|
|
845
|
-
summary = await self._generate_summary(paper_content, final_scores, rebuttals)
|
|
846
|
-
|
|
847
|
-
# Calculate overall score
|
|
848
|
-
overall_score = sum(s.score for s in final_scores.values()) / len(final_scores)
|
|
849
|
-
|
|
850
|
-
result = ReviewResult(
|
|
851
|
-
review_id=review_id,
|
|
852
|
-
recommendation=recommendation,
|
|
853
|
-
confidence=confidence,
|
|
854
|
-
summary=summary,
|
|
855
|
-
scores=final_scores,
|
|
856
|
-
debate_transcript=rebuttals + evidence,
|
|
857
|
-
overall_score=overall_score
|
|
858
|
-
)
|
|
859
|
-
|
|
860
|
-
print(f"\n=== Review Complete ===")
|
|
861
|
-
print(f"Recommendation: {recommendation.value}")
|
|
862
|
-
print(f"Confidence: {confidence:.2f}")
|
|
863
|
-
print(f"Overall Score: {overall_score:.1f}/10")
|
|
864
|
-
|
|
865
|
-
return result
|
|
866
|
-
|
|
867
|
-
async def _round_1_initial_positions(
|
|
868
|
-
self,
|
|
869
|
-
paper_content: str,
|
|
870
|
-
metadata: Dict[str, Any]
|
|
871
|
-
) -> Dict[AgentRole, AgentScore]:
|
|
872
|
-
"""Round 1: Each agent provides initial position."""
|
|
873
|
-
tasks = [
|
|
874
|
-
agent.initial_review(paper_content, metadata)
|
|
875
|
-
for agent in self.agents
|
|
876
|
-
]
|
|
877
|
-
scores = await asyncio.gather(*tasks)
|
|
878
|
-
|
|
879
|
-
results = {}
|
|
880
|
-
for agent, score in zip(self.agents, scores):
|
|
881
|
-
results[agent.role] = score
|
|
882
|
-
print(f"{agent.role.value}: {score.score}/10 - {score.reasoning}")
|
|
883
|
-
|
|
884
|
-
return results
|
|
885
|
-
|
|
886
|
-
async def _round_2_rebuttals(
|
|
887
|
-
self,
|
|
888
|
-
paper_content: str,
|
|
889
|
-
initial_scores: Dict[AgentRole, AgentScore]
|
|
890
|
-
) -> List[DebateArgument]:
|
|
891
|
-
"""Round 2: Agents provide rebuttals."""
|
|
892
|
-
# Convert scores to arguments
|
|
893
|
-
initial_arguments = [
|
|
894
|
-
DebateArgument(
|
|
895
|
-
agent=role,
|
|
896
|
-
round_number=1,
|
|
897
|
-
position=score.reasoning,
|
|
898
|
-
arguments=score.weaknesses,
|
|
899
|
-
evidence=score.strengths,
|
|
900
|
-
score=score.score
|
|
901
|
-
)
|
|
902
|
-
for role, score in initial_scores.items()
|
|
903
|
-
]
|
|
904
|
-
|
|
905
|
-
tasks = [
|
|
906
|
-
agent.rebuttal(initial_arguments, paper_content)
|
|
907
|
-
for agent in self.agents
|
|
908
|
-
]
|
|
909
|
-
rebuttals = await asyncio.gather(*tasks)
|
|
910
|
-
|
|
911
|
-
for rebuttal in rebuttals:
|
|
912
|
-
print(f"{rebuttal.agent.value}: {rebuttal.position}")
|
|
913
|
-
|
|
914
|
-
return rebuttals
|
|
915
|
-
|
|
916
|
-
async def _round_3_evidence(
|
|
917
|
-
self,
|
|
918
|
-
paper_content: str,
|
|
919
|
-
rebuttals: List[DebateArgument]
|
|
920
|
-
) -> List[DebateArgument]:
|
|
921
|
-
"""Round 3: Agents provide evidence for disputed points."""
|
|
922
|
-
# Identify disputed points
|
|
923
|
-
disputed = self._identify_disputes(rebuttals)
|
|
924
|
-
|
|
925
|
-
tasks = [
|
|
926
|
-
agent.provide_evidence(disputed, paper_content)
|
|
927
|
-
for agent in self.agents
|
|
928
|
-
]
|
|
929
|
-
evidence = await asyncio.gather(*tasks)
|
|
930
|
-
|
|
931
|
-
return evidence
|
|
932
|
-
|
|
933
|
-
async def _round_4_consensus(
|
|
934
|
-
self,
|
|
935
|
-
initial_scores: Dict[AgentRole, AgentScore],
|
|
936
|
-
rebuttals: List[DebateArgument],
|
|
937
|
-
evidence: List[DebateArgument]
|
|
938
|
-
) -> Dict[AgentRole, AgentScore]:
|
|
939
|
-
"""Round 4: Agents provide final positions."""
|
|
940
|
-
all_arguments = rebuttals + evidence
|
|
941
|
-
|
|
942
|
-
tasks = [
|
|
943
|
-
agent.final_position(all_arguments)
|
|
944
|
-
for agent in self.agents
|
|
945
|
-
]
|
|
946
|
-
final_scores = await asyncio.gather(*tasks)
|
|
947
|
-
|
|
948
|
-
results = {}
|
|
949
|
-
for agent, score in zip(self.agents, final_scores):
|
|
950
|
-
results[agent.role] = score
|
|
951
|
-
print(f"{agent.role.value}: Final score {score.score}/10")
|
|
952
|
-
|
|
953
|
-
return results
|
|
954
|
-
|
|
955
|
-
def _synthesize_recommendation(
|
|
956
|
-
self,
|
|
957
|
-
scores: Dict[AgentRole, AgentScore]
|
|
958
|
-
) -> tuple[Recommendation, float]:
|
|
959
|
-
"""Synthesize final recommendation from agent scores."""
|
|
960
|
-
avg_score = sum(s.score for s in scores.values()) / len(scores)
|
|
961
|
-
|
|
962
|
-
# Calculate confidence based on agreement
|
|
963
|
-
score_variance = sum((s.score - avg_score) ** 2 for s in scores.values()) / len(scores)
|
|
964
|
-
confidence = max(0.0, 1.0 - (score_variance / 10))
|
|
965
|
-
|
|
966
|
-
# Map score to recommendation
|
|
967
|
-
if avg_score >= 8.5:
|
|
968
|
-
rec = Recommendation.STRONG_ACCEPT
|
|
969
|
-
elif avg_score >= 7.5:
|
|
970
|
-
rec = Recommendation.ACCEPT
|
|
971
|
-
elif avg_score >= 6.5:
|
|
972
|
-
rec = Recommendation.WEAK_ACCEPT
|
|
973
|
-
elif avg_score >= 5.5:
|
|
974
|
-
rec = Recommendation.BORDERLINE
|
|
975
|
-
elif avg_score >= 4.5:
|
|
976
|
-
rec = Recommendation.WEAK_REJECT
|
|
977
|
-
elif avg_score >= 3.5:
|
|
978
|
-
rec = Recommendation.REJECT
|
|
979
|
-
else:
|
|
980
|
-
rec = Recommendation.STRONG_REJECT
|
|
981
|
-
|
|
982
|
-
return rec, confidence
|
|
983
|
-
|
|
984
|
-
async def _generate_summary(
|
|
985
|
-
self,
|
|
986
|
-
paper_content: str,
|
|
987
|
-
scores: Dict[AgentRole, AgentScore],
|
|
988
|
-
debate: List[DebateArgument]
|
|
989
|
-
) -> Dict[str, Any]:
|
|
990
|
-
"""Generate executive summary of review."""
|
|
991
|
-
all_strengths = []
|
|
992
|
-
all_weaknesses = []
|
|
993
|
-
|
|
994
|
-
for score in scores.values():
|
|
995
|
-
all_strengths.extend(score.strengths)
|
|
996
|
-
all_weaknesses.extend(score.weaknesses)
|
|
997
|
-
|
|
998
|
-
# Deduplicate
|
|
999
|
-
strengths = list(set(all_strengths))
|
|
1000
|
-
weaknesses = list(set(all_weaknesses))
|
|
1001
|
-
|
|
1002
|
-
return {
|
|
1003
|
-
'strengths': strengths,
|
|
1004
|
-
'weaknesses': weaknesses,
|
|
1005
|
-
'key_findings': [
|
|
1006
|
-
f"{role.value}: {score.reasoning}"
|
|
1007
|
-
for role, score in scores.items()
|
|
1008
|
-
],
|
|
1009
|
-
'controversial_points': self._identify_disputes([])
|
|
1010
|
-
}
|
|
1011
|
-
|
|
1012
|
-
def _identify_disputes(self, arguments: List[DebateArgument]) -> List[str]:
|
|
1013
|
-
"""Identify disputed points in debate."""
|
|
1014
|
-
# Simplified: return empty list
|
|
1015
|
-
return []
|
|
1016
|
-
|
|
1017
|
-
def _generate_review_id(self) -> str:
|
|
1018
|
-
"""Generate unique review ID."""
|
|
1019
|
-
import uuid
|
|
1020
|
-
return str(uuid.uuid4())
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
# Example usage
|
|
1024
|
-
async def main():
|
|
1025
|
-
"""Example: Review a paper."""
|
|
1026
|
-
orchestrator = DebateOrchestrator()
|
|
1027
|
-
|
|
1028
|
-
# Mock paper content (in practice, extract from PDF)
|
|
1029
|
-
paper_content = """
|
|
1030
|
-
Title: Novel Approach to Multi-Agent Reinforcement Learning
|
|
1031
|
-
|
|
1032
|
-
Abstract: We present a novel approach to multi-agent reinforcement learning...
|
|
1033
|
-
|
|
1034
|
-
[Full paper content would go here]
|
|
1035
|
-
"""
|
|
1036
|
-
|
|
1037
|
-
metadata = {
|
|
1038
|
-
'title': 'Novel Approach to Multi-Agent Reinforcement Learning',
|
|
1039
|
-
'authors': ['Smith, J.', 'Doe, A.'],
|
|
1040
|
-
'year': 2024,
|
|
1041
|
-
'field': 'cs',
|
|
1042
|
-
'code_repo': 'https://github.com/example/marl',
|
|
1043
|
-
'data_availability': 'Upon request'
|
|
1044
|
-
}
|
|
1045
|
-
|
|
1046
|
-
result = await orchestrator.orchestrate_review(
|
|
1047
|
-
paper_id='arxiv:2024.12345',
|
|
1048
|
-
paper_content=paper_content,
|
|
1049
|
-
metadata=metadata
|
|
1050
|
-
)
|
|
1051
|
-
|
|
1052
|
-
print(f"\n=== Final Review ===")
|
|
1053
|
-
print(f"Recommendation: {result.recommendation.value}")
|
|
1054
|
-
print(f"Confidence: {result.confidence:.2f}")
|
|
1055
|
-
print(f"Overall Score: {result.overall_score:.1f}/10")
|
|
1056
|
-
print(f"\nStrengths:")
|
|
1057
|
-
for strength in result.summary['strengths']:
|
|
1058
|
-
print(f" - {strength}")
|
|
1059
|
-
print(f"\nWeaknesses:")
|
|
1060
|
-
for weakness in result.summary['weaknesses']:
|
|
1061
|
-
print(f" - {weakness}")
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
if __name__ == "__main__":
|
|
1065
|
-
asyncio.run(main())
|
|
1066
|
-
```
|
|
1067
|
-
|
|
1068
|
-
## Deployment Instructions
|
|
1069
|
-
|
|
1070
|
-
### Step 1: Deploy Agent System
|
|
1071
|
-
|
|
1072
|
-
```bash
|
|
1073
|
-
# Deploy orchestrator
|
|
1074
|
-
kubectl apply -f - <<EOF
|
|
1075
|
-
apiVersion: apps/v1
|
|
1076
|
-
kind: Deployment
|
|
1077
|
-
metadata:
|
|
1078
|
-
name: research-orchestrator
|
|
1079
|
-
namespace: research
|
|
1080
|
-
spec:
|
|
1081
|
-
replicas: 2
|
|
1082
|
-
selector:
|
|
1083
|
-
matchLabels:
|
|
1084
|
-
app: research-orchestrator
|
|
1085
|
-
template:
|
|
1086
|
-
metadata:
|
|
1087
|
-
labels:
|
|
1088
|
-
app: research-orchestrator
|
|
1089
|
-
spec:
|
|
1090
|
-
containers:
|
|
1091
|
-
- name: orchestrator
|
|
1092
|
-
image: registry.example.com/agents/research-orchestrator:1.0.0
|
|
1093
|
-
env:
|
|
1094
|
-
- name: ANTHROPIC_API_KEY
|
|
1095
|
-
valueFrom:
|
|
1096
|
-
secretKeyRef:
|
|
1097
|
-
name: ai-keys
|
|
1098
|
-
key: anthropic
|
|
1099
|
-
EOF
|
|
1100
|
-
|
|
1101
|
-
# Deploy specialist agents
|
|
1102
|
-
for agent in methodology-critic novelty-assessor reproducibility-checker; do
|
|
1103
|
-
kubectl apply -f - <<EOF
|
|
1104
|
-
apiVersion: apps/v1
|
|
1105
|
-
kind: Deployment
|
|
1106
|
-
metadata:
|
|
1107
|
-
name: $agent
|
|
1108
|
-
namespace: research
|
|
1109
|
-
spec:
|
|
1110
|
-
replicas: 3
|
|
1111
|
-
selector:
|
|
1112
|
-
matchLabels:
|
|
1113
|
-
app: $agent
|
|
1114
|
-
template:
|
|
1115
|
-
metadata:
|
|
1116
|
-
labels:
|
|
1117
|
-
app: $agent
|
|
1118
|
-
spec:
|
|
1119
|
-
containers:
|
|
1120
|
-
- name: agent
|
|
1121
|
-
image: registry.example.com/agents/$agent:1.0.0
|
|
1122
|
-
env:
|
|
1123
|
-
- name: ANTHROPIC_API_KEY
|
|
1124
|
-
valueFrom:
|
|
1125
|
-
secretKeyRef:
|
|
1126
|
-
name: ai-keys
|
|
1127
|
-
key: anthropic
|
|
1128
|
-
EOF
|
|
1129
|
-
done
|
|
1130
|
-
```
|
|
1131
|
-
|
|
1132
|
-
### Step 2: Configure Agent Registry
|
|
1133
|
-
|
|
1134
|
-
```bash
|
|
1135
|
-
# Deploy OSSA agent registry for agent discovery
|
|
1136
|
-
kubectl apply -f agent-registry.yaml
|
|
1137
|
-
```
|
|
1138
|
-
|
|
1139
|
-
## Production Checklist
|
|
1140
|
-
|
|
1141
|
-
- [ ] All specialist agents deployed and registered
|
|
1142
|
-
- [ ] Debate rounds configured appropriately
|
|
1143
|
-
- [ ] Consensus thresholds validated
|
|
1144
|
-
- [ ] ArXiv/PubMed API access configured
|
|
1145
|
-
- [ ] Code repository scanning enabled
|
|
1146
|
-
- [ ] Metrics and monitoring deployed
|
|
1147
|
-
- [ ] Cost limits configured
|
|
1148
|
-
- [ ] Debate transcripts logged for analysis
|
|
1149
|
-
- [ ] Human review integration tested
|
|
1150
|
-
|
|
1151
|
-
## Cost Management
|
|
1152
|
-
|
|
1153
|
-
- **Per-paper cost**: ~$5-10 (4 agents × 4 rounds × $0.30/round)
|
|
1154
|
-
- **Optimization**: Cache literature searches, reuse embeddings, batch processing
|
|
1155
|
-
- **Budget alerts**: Notify when approaching limits
|
|
1156
|
-
|
|
1157
|
-
## Further Reading
|
|
1158
|
-
|
|
1159
|
-
- [Multi-Agent Reinforcement Learning](https://arxiv.org/)
|
|
1160
|
-
- [Peer Review Best Practices](https://www.acm.org/publications/policies/peer-review)
|
|
1161
|
-
- [OSSA Multi-Agent Patterns](https://openstandardagents.org/docs/multi-agent)
|