PyPI - monoco-toolkit - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

monoco-toolkit 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

monoco/__main__.py +8 -0
monoco/core/artifacts/__init__.py +16 -0
monoco/core/artifacts/manager.py +575 -0
monoco/core/artifacts/models.py +161 -0
monoco/core/config.py +38 -4
monoco/core/git.py +23 -0
monoco/core/hooks/builtin/git_cleanup.py +1 -1
monoco/core/ingestion/__init__.py +20 -0
monoco/core/ingestion/discovery.py +248 -0
monoco/core/ingestion/watcher.py +343 -0
monoco/core/ingestion/worker.py +436 -0
monoco/core/injection.py +63 -29
monoco/core/integrations.py +2 -2
monoco/core/loader.py +633 -0
monoco/core/output.py +5 -5
monoco/core/registry.py +34 -19
monoco/core/resource/__init__.py +5 -0
monoco/core/resource/finder.py +98 -0
monoco/core/resource/manager.py +91 -0
monoco/core/resource/models.py +35 -0
monoco/core/skill_framework.py +292 -0
monoco/core/skills.py +524 -385
monoco/core/sync.py +73 -1
monoco/core/workflow_converter.py +420 -0
monoco/daemon/app.py +77 -1
monoco/daemon/commands.py +10 -0
monoco/daemon/mailroom_service.py +196 -0
monoco/daemon/models.py +1 -0
monoco/daemon/scheduler.py +236 -0
monoco/daemon/services.py +185 -0
monoco/daemon/triggers.py +55 -0
monoco/features/agent/__init__.py +2 -2
monoco/features/agent/adapter.py +41 -0
monoco/features/agent/apoptosis.py +44 -0
monoco/features/agent/cli.py +101 -144
monoco/features/agent/config.py +35 -21
monoco/features/agent/defaults.py +6 -49
monoco/features/agent/engines.py +32 -6
monoco/features/agent/manager.py +47 -6
monoco/features/agent/models.py +2 -2
monoco/features/agent/resources/atoms/atom-code-dev.yaml +61 -0
monoco/features/agent/resources/atoms/atom-issue-lifecycle.yaml +73 -0
monoco/features/agent/resources/atoms/atom-knowledge.yaml +55 -0
monoco/features/agent/resources/atoms/atom-review.yaml +60 -0
monoco/{core/resources/en → features/agent/resources/en/skills/monoco_atom_core}/SKILL.md +3 -1
monoco/features/agent/resources/en/skills/monoco_workflow_agent_engineer/SKILL.md +94 -0
monoco/features/agent/resources/en/skills/monoco_workflow_agent_manager/SKILL.md +93 -0
monoco/features/agent/resources/en/skills/monoco_workflow_agent_planner/SKILL.md +85 -0
monoco/features/agent/resources/en/skills/monoco_workflow_agent_reviewer/SKILL.md +114 -0
monoco/features/agent/resources/workflows/workflow-dev.yaml +83 -0
monoco/features/agent/resources/workflows/workflow-issue-create.yaml +72 -0
monoco/features/agent/resources/workflows/workflow-review.yaml +94 -0
monoco/features/agent/resources/zh/roles/monoco_role_engineer.yaml +49 -0
monoco/features/agent/resources/zh/roles/monoco_role_manager.yaml +46 -0
monoco/features/agent/resources/zh/roles/monoco_role_planner.yaml +46 -0
monoco/features/agent/resources/zh/roles/monoco_role_reviewer.yaml +47 -0
monoco/{core/resources/zh → features/agent/resources/zh/skills/monoco_atom_core}/SKILL.md +3 -1
monoco/features/agent/resources/{skills/flow_engineer → zh/skills/monoco_workflow_agent_engineer}/SKILL.md +2 -2
monoco/features/agent/resources/{skills/flow_manager → zh/skills/monoco_workflow_agent_manager}/SKILL.md +2 -2
monoco/features/agent/resources/zh/skills/monoco_workflow_agent_planner/SKILL.md +259 -0
monoco/features/agent/resources/zh/skills/monoco_workflow_agent_reviewer/SKILL.md +137 -0
monoco/features/agent/session.py +59 -11
monoco/features/agent/worker.py +38 -2
monoco/features/artifact/__init__.py +0 -0
monoco/features/artifact/adapter.py +33 -0
monoco/features/artifact/resources/zh/AGENTS.md +14 -0
monoco/features/artifact/resources/zh/skills/monoco_atom_artifact/SKILL.md +278 -0
monoco/features/glossary/__init__.py +0 -0
monoco/features/glossary/adapter.py +42 -0
monoco/features/glossary/config.py +5 -0
monoco/features/glossary/resources/en/AGENTS.md +29 -0
monoco/features/glossary/resources/en/skills/monoco_atom_glossary/SKILL.md +35 -0
monoco/features/glossary/resources/zh/AGENTS.md +29 -0
monoco/features/glossary/resources/zh/skills/monoco_atom_glossary/SKILL.md +35 -0
monoco/features/hooks/__init__.py +11 -0
monoco/features/hooks/adapter.py +67 -0
monoco/features/hooks/commands.py +309 -0
monoco/features/hooks/core.py +441 -0
monoco/features/hooks/resources/ADDING_HOOKS.md +234 -0
monoco/features/i18n/adapter.py +18 -5
monoco/features/i18n/core.py +482 -17
monoco/features/i18n/resources/en/{SKILL.md → skills/monoco_atom_i18n/SKILL.md} +3 -1
monoco/features/i18n/resources/en/skills/monoco_workflow_i18n_scan/SKILL.md +105 -0
monoco/features/i18n/resources/zh/{SKILL.md → skills/monoco_atom_i18n/SKILL.md} +3 -1
monoco/features/i18n/resources/{skills/i18n_scan_workflow → zh/skills/monoco_workflow_i18n_scan}/SKILL.md +2 -2
monoco/features/issue/adapter.py +19 -6
monoco/features/issue/commands.py +281 -7
monoco/features/issue/core.py +272 -19
monoco/features/issue/engine/machine.py +118 -5
monoco/features/issue/linter.py +60 -5
monoco/features/issue/models.py +3 -2
monoco/features/issue/resources/en/AGENTS.md +109 -0
monoco/features/issue/resources/en/{SKILL.md → skills/monoco_atom_issue/SKILL.md} +3 -1
monoco/features/issue/resources/en/skills/monoco_workflow_issue_creation/SKILL.md +167 -0
monoco/features/issue/resources/en/skills/monoco_workflow_issue_development/SKILL.md +224 -0
monoco/features/issue/resources/en/skills/monoco_workflow_issue_management/SKILL.md +159 -0
monoco/features/issue/resources/en/skills/monoco_workflow_issue_refinement/SKILL.md +203 -0
monoco/features/issue/resources/hooks/post-checkout.sh +39 -0
monoco/features/issue/resources/hooks/pre-commit.sh +41 -0
monoco/features/issue/resources/hooks/pre-push.sh +35 -0
monoco/features/issue/resources/zh/AGENTS.md +109 -0
monoco/features/issue/resources/zh/{SKILL.md → skills/monoco_atom_issue_lifecycle/SKILL.md} +3 -1
monoco/features/issue/resources/zh/skills/monoco_workflow_issue_creation/SKILL.md +167 -0
monoco/features/issue/resources/zh/skills/monoco_workflow_issue_development/SKILL.md +224 -0
monoco/features/issue/resources/{skills/issue_lifecycle_workflow → zh/skills/monoco_workflow_issue_management}/SKILL.md +2 -2
monoco/features/issue/resources/zh/skills/monoco_workflow_issue_refinement/SKILL.md +203 -0
monoco/features/issue/validator.py +101 -1
monoco/features/memo/adapter.py +21 -8
monoco/features/memo/cli.py +103 -10
monoco/features/memo/core.py +178 -92
monoco/features/memo/models.py +53 -0
monoco/features/memo/resources/en/skills/monoco_atom_memo/SKILL.md +77 -0
monoco/features/memo/resources/en/skills/monoco_workflow_note_processing/SKILL.md +140 -0
monoco/features/memo/resources/zh/{SKILL.md → skills/monoco_atom_memo/SKILL.md} +3 -1
monoco/features/memo/resources/{skills/note_processing_workflow → zh/skills/monoco_workflow_note_processing}/SKILL.md +2 -2
monoco/features/spike/adapter.py +18 -5
monoco/features/spike/resources/en/{SKILL.md → skills/monoco_atom_spike/SKILL.md} +3 -1
monoco/features/spike/resources/en/skills/monoco_workflow_research/SKILL.md +121 -0
monoco/features/spike/resources/zh/{SKILL.md → skills/monoco_atom_spike/SKILL.md} +3 -1
monoco/features/spike/resources/{skills/research_workflow → zh/skills/monoco_workflow_research}/SKILL.md +2 -2
monoco/main.py +38 -1
monoco_toolkit-0.3.11.dist-info/METADATA +130 -0
monoco_toolkit-0.3.11.dist-info/RECORD +181 -0
monoco/features/agent/reliability.py +0 -106
monoco/features/agent/resources/skills/flow_reviewer/SKILL.md +0 -114
monoco_toolkit-0.3.9.dist-info/METADATA +0 -127
monoco_toolkit-0.3.9.dist-info/RECORD +0 -115
/monoco/{core → features/agent}/resources/en/AGENTS.md +0 -0
/monoco/{core → features/agent}/resources/zh/AGENTS.md +0 -0
{monoco_toolkit-0.3.9.dist-info → monoco_toolkit-0.3.11.dist-info}/WHEEL +0 -0
{monoco_toolkit-0.3.9.dist-info → monoco_toolkit-0.3.11.dist-info}/entry_points.txt +0 -0
{monoco_toolkit-0.3.9.dist-info → monoco_toolkit-0.3.11.dist-info}/licenses/LICENSE +0 -0

monoco/features/i18n/core.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import fnmatch
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Tuple
+from enum import Enum
+from dataclasses import dataclass
 import re
 DEFAULT_EXCLUDES = [
@@ -214,43 +216,506 @@ def check_translation_exists(
     return missing
+# Common technical terms that should not count as "English words"
+# when detecting language in Chinese documents
+TECHNICAL_TERMS_ALLOWLIST = {
+    # CLI/Shell
+    "cli", "api", "ui", "ux", "gui", "cli", "shell", "bash", "zsh", "sh",
+    "cmd", "powershell", "terminal", "console", "prompt",
+    # Cloud/Container
+    "kubernetes", "k8s", "docker", "container", "pod", "cluster", "node",
+    "namespace", "ingress", "service", "deployment", "helm", "kubectl",
+    "aws", "gcp", "azure", "cloud", "serverless", "lambda", "ec2", "s3",
+    # DevOps/CI/CD
+    "ci", "cd", "cicd", "pipeline", "jenkins", "gitlab", "github", "git",
+    "svn", "mercurial", "hg", "commit", "branch", "merge", "rebase", "tag",
+    "hook", "action", "workflow", "artifact", "build", "deploy", "release",
+    # Programming Languages
+    "python", "javascript", "js", "typescript", "ts", "java", "kotlin",
+    "scala", "groovy", "ruby", "go", "golang", "rust", "c", "cpp", "c++",
+    "csharp", "c#", "php", "perl", "lua", "swift", "objc", "objective-c",
+    "r", "matlab", "julia", "dart", "flutter", "elixir", "erlang", "haskell",
+    "clojure", "lisp", "scheme", "racket", "fsharp", "f#", "vb", "vba",
+    # Web/Frameworks
+    "html", "css", "scss", "sass", "less", "xml", "json", "yaml", "yml",
+    "toml", "ini", "csv", "tsv", "markdown", "md", "rst", "asciidoc",
+    "react", "vue", "angular", "svelte", "nextjs", "nuxt", "django",
+    "flask", "fastapi", "tornado", "express", "koa", "nestjs", "spring",
+    "rails", "laravel", "symfony", "dotnet", "aspnet", "mvc", "mvvm",
+    # Databases
+    "sql", "nosql", "mysql", "postgresql", "postgres", "sqlite", "oracle",
+    "mssql", "sqlserver", "mongodb", "mongo", "redis", "cassandra",
+    "dynamodb", "firebase", "elasticsearch", "solr", "neo4j", "graphql",
+    # Testing
+    "test", "testing", "unittest", "pytest", "jest", "mocha", "jasmine",
+    "cypress", "selenium", "cucumber", "bdd", "tdd", "mock", "stub",
+    "fixture", "assertion", "coverage", "benchmark", "profiling",
+    # Architecture/Patterns
+    "microservice", "microservices", "monolith", "server", "client",
+    "frontend", "backend", "fullstack", "api-gateway", "load-balancer",
+    "proxy", "cache", "cdn", "dns", "http", "https", "tcp", "udp",
+    "websocket", "grpc", "rest", "soap", "graphql", "oauth", "jwt",
+    "sso", "ldap", "auth", "authentication", "authorization",
+    # OS/Platform
+    "linux", "ubuntu", "debian", "centos", "rhel", "fedora", "arch",
+    "alpine", "windows", "macos", "darwin", "ios", "android",
+    "unix", "posix", "kernel", "systemd", "init", "daemon",
+    # Tools/IDE
+    "vscode", "idea", "pycharm", "webstorm", "vim", "neovim", "nvim",
+    "emacs", "sublime", "atom", "eclipse", "netbeans", "xcode",
+    "docker-compose", "dockerfile", "makefile", "cmake", "gradle",
+    "maven", "npm", "yarn", "pnpm", "pip", "conda", "venv", "virtualenv",
+    # AI/ML
+    "ai", "ml", "dl", "llm", "nlp", "cv", "neural", "network",
+    "tensorflow", "pytorch", "keras", "scikit", "sklearn", "pandas",
+    "numpy", "scipy", "matplotlib", "seaborn", "jupyter", "notebook",
+    "training", "inference", "model", "dataset", "vector", "embedding",
+    # Security
+    "security", "vulnerability", "exploit", "cve", "xss", "csrf",
+    "injection", "encryption", "decryption", "hash", "signature",
+    "certificate", "ssl", "tls", "https", "firewall", "vpn",
+    # Monitoring/Observability
+    "log", "logging", "metrics", "tracing", "observability", "monitoring",
+    "alert", "dashboard", "grafana", "prometheus", "elk", "splunk",
+    "datadog", "newrelic", "sentry", "bugsnag", "rollbar",
+    # Agile/Project Management
+    "agile", "scrum", "kanban", "sprint", "backlog", "epic", "story",
+    "task", "issue", "ticket", "bug", "feature", "milestone", "roadmap",
+    "retro", "standup", "review", "demo", "po", "sm", "pm",
+    # Misc Tech Terms
+    "id", "uuid", "guid", "url", "uri", "ip", "ipv4", "ipv6",
+    "mac", "hostname", "domain", "subdomain", "path", "query",
+    "header", "body", "payload", "request", "response", "status",
+    "error", "exception", "warning", "info", "debug", "trace",
+    "config", "configuration", "setting", "option", "flag", "env",
+    "variable", "constant", "literal", "expression", "statement",
+    "function", "method", "class", "object", "instance", "interface",
+    "abstract", "virtual", "override", "inherit", "extend", "implement",
+    "import", "export", "module", "package", "library", "framework",
+    "sdk", "toolkit", "runtime", "compiler", "interpreter", "vm",
+    "version", "release", "changelog", "license", "copyright",
+    "repo", "repository", "fork", "clone", "pull", "push", "fetch",
+    "upstream", "origin", "remote", "local", "stash", "stage",
+    "index", "working", "tree", "head", "detached", "orphan",
+    "squash", "amend", "cherry-pick", "revert", "reset", "clean",
+    "linter", "formatter", "parser", "lexer", "ast", "ir",
+    "bytecode", "opcode", "assembly", "binary", "executable",
+    "static", "dynamic", "linking", "compilation", "transpilation",
+    "minification", "bundling", "tree-shaking", "code-splitting",
+    "hot-reload", "hot-restart", "live-reload", "watch", "watchman",
+    "polyfill", "shim", "ponyfill", "fallback", "graceful",
+    "async", "sync", "parallel", "concurrent", "sequential",
+    "blocking", "non-blocking", "io", "nio", "epoll", "kqueue",
+    "thread", "process", "coroutine", "fiber", "goroutine",
+    "mutex", "lock", "semaphore", "channel", "queue", "stack",
+    "heap", "gc", "garbage", "collection", "memory", "leak",
+    "buffer", "stream", "pipe", "redirect", "tee", "cat",
+    "grep", "awk", "sed", "cut", "sort", "uniq", "wc", "head", "tail",
+    "find", "locate", "which", "whereis", "type", "alias",
+    "export", "source", "env", "printenv", "set", "unset",
+    "chmod", "chown", "chgrp", "umask", "sudo", "su",
+    "ssh", "scp", "sftp", "rsync", "ftp", "telnet", "nc",
+    "ping", "traceroute", "netstat", "ss", "lsof", "fuser",
+    "ps", "top", "htop", "kill", "pkill", "killall", "nice",
+    "cron", "at", "batch", "systemctl", "service", "init",
+    "mount", "umount", "df", "du", "fsck", "mkfs", "fdisk",
+    "parted", "lsblk", "blkid", "uuidgen", "tune2fs",
+    "tar", "gzip", "gunzip", "zip", "unzip", "bz2", "xz",
+    "7z", "rar", "archive", "compress", "decompress", "extract",
+    "curl", "wget", "httpie", "postman", "insomnia",
+    "nginx", "apache", "httpd", "tomcat", "jetty", "undertow",
+    "haproxy", "traefik", "envoy", "istio", "linkerd",
+    "rabbitmq", "kafka", "mqtt", "amqp", "stomp", "zeromq",
+    "memcached", "etcd", "consul", "vault", "zookeeper",
+    "prometheus", "grafana", "jaeger", "zipkin", "opentelemetry",
+    "ansible", "puppet", "chef", "saltstack", "terraform",
+    "pulumi", "vagrant", "packer", "nomad", "consul-template",
+    "github-actions", "gitlab-ci", "travis", "circleci", "jenkins",
+    "teamcity", "bamboo", "drone", "argo", "tekton", "spinnaker",
+    "sonarqube", "nexus", "artifactory", "harbor", "chartmuseum",
+    "loki", "fluentd", "fluent-bit", "vector", "filebeat",
+    "telegraf", "influxdb", "timescaledb", "promscale",
+    "minio", "ceph", "glusterfs", "nfs", "smb", "cifs",
+    "vpn", "wireguard", "openvpn", "ipsec", "ssl-vpn",
+    "waf", "ids", "ips", "siem", "soar", "xdr", "edr",
+    "ldap", "ad", "sso", "saml", "oauth2", "openid", "oidc",
+    "mfa", "2fa", "totp", "hotp", "u2f", "webauthn", "fido",
+    "aes", "rsa", "ecc", "dsa", "ecdsa", "ed25519", "curve25519",
+    "sha", "md5", "bcrypt", "scrypt", "argon2", "pbkdf2",
+    "hmac", "cmac", "gcm", "cbc", "ecb", "ctr", "ofb", "cfb",
+    "tls", "ssl", "x509", "csr", "crt", "pem", "der", "p12", "pfx",
+    "acme", "letsencrypt", "certbot", "traefik", "caddy",
+    "wasm", "webassembly", "wasmer", "wasmtime", "wasi",
+    "pwa", "spa", "mpa", "ssr", "csr", "ssg", "isr",
+    "amp", "instant", "turbo", "stimulus", "alpine", "htmx",
+    "webcomponents", "shadow", "dom", "custom", "elements",
+    "service-worker", "pwa", "manifest", "offline", "cache",
+    "webrtc", "websocket", "sse", "eventsource", "polling",
+    "graphql", "subscription", "mutation", "query", "schema",
+    "resolver", "directive", "fragment", "interface", "union",
+    "prisma", "sequelize", "typeorm", "sqlalchemy", "orm",
+    "migration", "seed", "factory", "fixture", "mock", "stub",
+    "faker", "factory-boy", "hypothesis", "property-based",
+    "snapshot", "visual", "regression", "e2e", "integration",
+    "unit", "functional", "acceptance", "performance", "load",
+    "stress", "chaos", "contract", "pact", "consumer", "provider",
+    "tdd", "bdd", "atdd", "sbe", "example", "specification",
+    "given", "when", "then", "scenario", "feature", "background",
+    "cucumber", "behave", "specflow", "gauge", "relish",
+    "allure", "reportportal", "xunit", "nunit", "mstest",
+    "sonar", "coveralls", "codecov", "codeclimate", "codacy",
+    "deepsource", "snyk", "whitesource", "blackduck", "fossa",
+    "dependabot", "renovate", "snyk", "greenkeeper",
+    "pre-commit", "husky", "lint-staged", "commitlint",
+    "semantic-release", "standard-version", "conventional",
+    "changelog", "commitizen", "cz", "commitlint",
+    "monoco", "kimi", "claude", "gemini", "qwen", "gpt",
+}
 def detect_language(content: str) -> str:
     """
-    Detect the language of the content using simple heuristics.
+    Detect the language of the content using improved heuristics.
+    This function is designed to handle technical documents with mixed
+    Chinese and English content, especially for IT/Software development topics.
     Returns: 'zh', 'en', or 'unknown'
     """
     if not content:
         return "unknown"
     # Strip YAML Frontmatter if present
-    # Matches --- at start, followed by anything, followed by ---
     frontmatter_pattern = re.compile(r"^---\n.*?\n---\n", re.DOTALL)
     content = frontmatter_pattern.sub("", content)
     if not content.strip():
         return "unknown"
-    # 1. Check for CJK characters (Chinese/Japanese/Korean)
-    # Range: \u4e00-\u9fff (Common CJK Unified Ideographs)
-    # Heuristic: If CJK count > threshold, it's likely Asian (we assume ZH for now in this context)
-    total_chars = len(content)
-    cjk_count = sum(1 for c in content if "\u4e00" <= c <= "\u9fff")
+    # Remove code blocks (```...```) as they often contain English keywords
+    code_block_pattern = re.compile(r"```[\s\S]*?```", re.MULTILINE)
+    content_no_code = code_block_pattern.sub("", content)
+    # Remove inline code (`...`)
+    inline_code_pattern = re.compile(r"`[^`]+`")
+    content_no_code = inline_code_pattern.sub("", content_no_code)
+    # Remove URLs
+    url_pattern = re.compile(r"https?://\S+|www\.\S+|")
+    content_clean = url_pattern.sub("", content_no_code)
+    # Remove issue IDs (EPIC-0001, FEAT-1234, etc.)
+    issue_id_pattern = re.compile(r"\b(EPIC|FEAT|CHORE|FIX)-\d{4}\b")
+    content_clean = issue_id_pattern.sub("", content_clean)
+    if not content_clean.strip():
+        # If after cleaning there's nothing left, it was likely all code/IDs
+        return "unknown"
-    # If > 5% chars are CJK, highly likely to be Chinese document
-    if total_chars > 0 and cjk_count / total_chars > 0.05:
+    total_chars = len(content_clean)
+    # Count CJK characters (Chinese/Japanese/Korean)
+    cjk_count = sum(1 for c in content_clean if "\u4e00" <= c <= "\u9fff")
+    # Count non-ASCII characters (excluding CJK)
+    non_ascii_non_cjk = sum(
+        1 for c in content_clean
+        if ord(c) > 127 and not ("\u4e00" <= c <= "\u9fff")
+    )
+    # Extract words for analysis (alphanumeric sequences)
+    words = re.findall(r"\b[a-zA-Z][a-zA-Z0-9]*\b", content_clean)
+    total_words = len(words)
+    # Count technical terms in allowlist (case-insensitive)
+    technical_term_count = sum(
+        1 for word in words
+        if word.lower() in TECHNICAL_TERMS_ALLOWLIST
+    )
+    # Calculate non-technical English words
+    non_technical_words = total_words - technical_term_count
+    # Heuristic 1: If > 3% chars are CJK, likely Chinese document
+    # Lowered threshold from 5% to 3% for better Chinese detection
+    cjk_ratio = cjk_count / total_chars if total_chars > 0 else 0
+    if cjk_ratio > 0.03:
         return "zh"
-    # 2. Check for English
-    # Heuristic: High ASCII ratio and low CJK
-    non_ascii = sum(1 for c in content if ord(c) > 127)
-    # If < 10% non-ASCII, likely English (or code)
-    if total_chars > 0 and non_ascii / total_chars < 0.1:
+    # Heuristic 2: If significant CJK (>1%) and some English technical terms,
+    # treat as Chinese (technical Chinese document)
+    if cjk_ratio > 0.01 and technical_term_count > 0:
+        return "zh"
+    # Heuristic 3: For English detection
+    # Only count non-technical English words towards English detection
+    # Require at least 10 non-technical words to be considered English
+    non_ascii_ratio = non_ascii_non_cjk / total_chars if total_chars > 0 else 0
+    # Relaxed threshold: < 15% non-ASCII (excluding CJK) AND
+    # has meaningful non-technical English content
+    if non_ascii_ratio < 0.15 and non_technical_words >= 10:
+        return "en"
+    # Heuristic 4: High English word density with low CJK
+    if cjk_ratio < 0.01 and total_words > 20:
         return "en"
     return "unknown"
+class BlockType(Enum):
+    """Types of content blocks in Markdown."""
+    HEADING = "heading"
+    PARAGRAPH = "paragraph"
+    CODE_BLOCK = "code_block"
+    LIST_ITEM = "list_item"
+    QUOTE = "quote"
+    TABLE = "table"
+    EMPTY = "empty"
+@dataclass
+class ContentBlock:
+    """Represents a block of content with its type and language info."""
+    type: BlockType
+    content: str
+    line_start: int
+    line_end: int
+    detected_lang: str = "unknown"
+    should_skip: bool = False
+def parse_markdown_blocks(content: str) -> List[ContentBlock]:
+    """
+    Parse markdown content into blocks for language detection.
+    This function respects block boundaries like:
+    - Code blocks (```...```)
+    - Headings (# ...)
+    - Paragraphs
+    - List items
+    Returns a list of ContentBlock objects.
+    """
+    # Strip YAML Frontmatter if present
+    frontmatter_pattern = re.compile(r"^---\n.*?\n---\n", re.DOTALL)
+    content_without_fm = frontmatter_pattern.sub("", content)
+    lines = content_without_fm.splitlines()
+    blocks = []
+    current_block_lines = []
+    current_block_type = BlockType.PARAGRAPH
+    current_start_line = 0
+    in_code_block = False
+    code_block_lang = ""
+    def flush_block():
+        nonlocal current_block_lines, current_start_line
+        if current_block_lines:
+            content = "\n".join(current_block_lines)
+            block = ContentBlock(
+                type=current_block_type,
+                content=content,
+                line_start=current_start_line,
+                line_end=current_start_line + len(current_block_lines),
+            )
+            blocks.append(block)
+            current_block_lines = []
+    for i, line in enumerate(lines):
+        # Code block handling
+        if line.strip().startswith("```"):
+            if not in_code_block:
+                # Start of code block
+                flush_block()
+                in_code_block = True
+                code_block_lang = line.strip()[3:].strip()
+                current_block_type = BlockType.CODE_BLOCK
+                current_start_line = i
+                current_block_lines.append(line)
+            else:
+                # End of code block
+                current_block_lines.append(line)
+                flush_block()
+                in_code_block = False
+                current_block_type = BlockType.PARAGRAPH
+            continue
+        if in_code_block:
+            current_block_lines.append(line)
+            continue
+        # Heading
+        if re.match(r"^#{1,6}\s", line):
+            flush_block()
+            block = ContentBlock(
+                type=BlockType.HEADING,
+                content=line,
+                line_start=i,
+                line_end=i + 1,
+            )
+            blocks.append(block)
+            current_start_line = i + 1
+            current_block_type = BlockType.PARAGRAPH
+            continue
+        # Empty line
+        if not line.strip():
+            flush_block()
+            blocks.append(ContentBlock(
+                type=BlockType.EMPTY,
+                content="",
+                line_start=i,
+                line_end=i + 1,
+            ))
+            current_start_line = i + 1
+            current_block_type = BlockType.PARAGRAPH
+            continue
+        # List item
+        if re.match(r"^\s*[-*+]\s", line) or re.match(r"^\s*\d+\.\s", line):
+            flush_block()
+            current_block_type = BlockType.LIST_ITEM
+            current_start_line = i
+            current_block_lines.append(line)
+            continue
+        # Quote
+        if line.strip().startswith(">"):
+            flush_block()
+            current_block_type = BlockType.QUOTE
+            current_start_line = i
+            current_block_lines.append(line)
+            continue
+        # Table row
+        if "|" in line and not line.strip().startswith("#"):
+            if current_block_type != BlockType.TABLE:
+                flush_block()
+                current_block_type = BlockType.TABLE
+                current_start_line = i
+            current_block_lines.append(line)
+            continue
+        # Default: accumulate into paragraph
+        if not current_block_lines:
+            current_start_line = i
+        current_block_lines.append(line)
+    # Flush remaining
+    flush_block()
+    return blocks
+def should_skip_block_for_language_check(
+    block: ContentBlock,
+    all_blocks: List[ContentBlock],
+    block_index: int,
+    source_lang: str = "zh"
+) -> bool:
+    """
+    Determine if a block should be skipped during language consistency checks.
+    Design Principle:
+    - Narrative text should be in the source language (e.g., Chinese)
+    - English should only appear as isolated nouns (technical terms, filenames, code blocks)
+    Reasons to skip:
+    1. Code blocks (always contain English keywords)
+    2. Empty blocks
+    3. Blocks with only technical terms/IDs/filenames
+    """
+    # Always skip code blocks
+    if block.type == BlockType.CODE_BLOCK:
+        return True
+    # Skip empty blocks
+    if block.type == BlockType.EMPTY:
+        return True
+    # Check if block contains only technical content
+    content = block.content.strip()
+    if not content:
+        return True
+    # Remove common non-language elements
+    cleaned = content
+    # Remove inline code
+    cleaned = re.sub(r"`[^`]+`", "", cleaned)
+    # Remove URLs
+    cleaned = re.sub(r"https?://\S+|www\.\S+", "", cleaned)
+    # Remove issue IDs
+    cleaned = re.sub(r"\b(EPIC|FEAT|CHORE|FIX)-\d{4}\b", "", cleaned)
+    # Remove file paths
+    cleaned = re.sub(r"[\w\-]+\.[\w\-]+", "", cleaned)
+    if not cleaned.strip():
+        return True
+    return False
+def detect_language_blocks(content: str, source_lang: str = "zh") -> List[ContentBlock]:
+    """
+    Detect language for each block in the content.
+    This provides block-level language detection that respects:
+    - Code blocks (skipped)
+    - Technical terms (handled by detect_language)
+    - Paragraph boundaries
+    Design Principle:
+    - Narrative text should be in the source language
+    - English should only appear as isolated nouns
+    Returns a list of ContentBlock objects with detected language.
+    """
+    blocks = parse_markdown_blocks(content)
+    for i, block in enumerate(blocks):
+        # Determine if this block should be skipped
+        block.should_skip = should_skip_block_for_language_check(
+            block, blocks, i, source_lang
+        )
+        if block.should_skip:
+            block.detected_lang = "unknown"
+            continue
+        # Detect language for this specific block
+        block.detected_lang = detect_language(block.content)
+    return blocks
+def has_language_mismatch_blocks(content: str, source_lang: str = "zh") -> Tuple[bool, List[ContentBlock]]:
+    """
+    Check if content has language mismatches at block level.
+    Returns:
+        (has_mismatch, mismatched_blocks)
+        - has_mismatch: True if any non-skipped block has mismatched language
+        - mismatched_blocks: List of blocks that don't match source language
+    """
+    blocks = detect_language_blocks(content, source_lang)
+    mismatched = []
+    for block in blocks:
+        if block.should_skip or block.detected_lang == "unknown":
+            continue
+        if source_lang.lower() in ["zh", "cn"]:
+            if block.detected_lang == "en":
+                mismatched.append(block)
+        elif source_lang.lower() == "en":
+            if block.detected_lang == "zh":
+                mismatched.append(block)
+    return len(mismatched) > 0, mismatched
 def is_content_source_language(path: Path, source_lang: str = "en") -> bool:
     """
     Check if file content appears to be in the source language.

monoco/features/i18n/resources/en/{SKILL.md → skills/monoco_atom_i18n/SKILL.md} RENAMED Viewed

@@ -1,6 +1,8 @@
 ---
-name: monoco-i18n
+name: monoco_atom_i18n
 description: Internationalization quality control for documentation. Ensures multi-language documentation stays synchronized.
+type: atom
+version: 1.0.0
 ---
 # Documentation I18n

monoco/features/i18n/resources/en/skills/monoco_workflow_i18n_scan/SKILL.md ADDED Viewed

@@ -0,0 +1,105 @@
+---
+name: monoco_workflow_i18n_scan
+description: I18n Scan Workflow (Flow Skill). Defines the standard operational process from scanning missing translations to generating translation tasks, ensuring multilingual documentation quality.
+type: workflow
+domain: i18n
+version: 1.0.0
+---
+# I18n Scan Workflow
+Standardized workflow for I18n scanning, ensuring the "Scan → Identify → Generate Tasks" process.
+## Workflow State Machine
+```mermaid
+stateDiagram-v2
+    [*] --> Scan: Trigger scan
+    Scan --> Identify: Scan completed
+    Scan --> Scan: Configuration error<br/>(fix configuration)
+    Identify --> GenerateTasks: Missing found
+    Identify --> [*]: No missing<br/>(completed)
+    GenerateTasks --> [*]: Task generation completed
+```
+## Execution Steps
+### 1. Scan (Scanning)
+- **Goal**: Scan all documents in the project, identify translation coverage
+- **Input**: Project files, i18n configuration
+- **Output**: Scan report
+- **Checkpoints**:
+  - [ ] Check i18n configuration in `.monoco/config.yaml`
+  - [ ] Run `monoco i18n scan`
+  - [ ] Confirm source and target language settings are correct
+  - [ ] Verify exclusion rules (.gitignore, build directories, etc.)
+### 2. Identify (Identify Missing)
+- **Goal**: Analyze scan results, identify specific missing translations
+- **Strategy**: Compare source and target files
+- **Checkpoints**:
+  - [ ] List all source files with missing translations
+  - [ ] Identify missing target languages
+  - [ ] Assess impact scope of missing translations
+  - [ ] Sort by priority (core documents first)
+### 3. Generate Tasks (Generate Tasks)
+- **Goal**: Create tracking tasks for missing translations
+- **Strategy**: Create Issue or memo based on missing status
+- **Checkpoints**:
+  - [ ] Create Feature Issue for core document missing translations
+  - [ ] Create Memo reminder for secondary document missing translations
+  - [ ] Annotate file paths requiring translation in the Issue
+  - [ ] Set reasonable priority and deadline
+## Decision Branches
+| Condition | Action |
+|-----------|--------|
+| Configuration error | Fix `.monoco/config.yaml`, rescan |
+| No missing translations | Process completed, no further action needed |
+| Large amount missing | Create Epic, split into multiple Features |
+| Critical document missing | High priority, create Issue immediately |
+## Compliance Requirements
+- **Required**: Verify i18n configuration is correct before scanning
+- **Required**: All core documents must have corresponding translations
+- **Recommended**: Run scans regularly (e.g., weekly)
+- **Recommended**: Bind translation tasks with feature development
+## Related Commands
+```bash
+# Scan for missing translations
+monoco i18n scan
+# Create translation task
+monoco issue create feature -t "Translate {filename} to {lang}"
+# Add memo
+monoco memo add "Needs translation: {filepath}"
+```
+## Output Example
+After scanning completes, a report like the following should be generated:
+```
+I18n Scan Report
+================
+Source Language: en
+Target Languages: zh, ja
+Missing Translations:
+- docs/guide.md → zh/guide.md [MISSING]
+- docs/api.md → ja/api.md [MISSING]
+Coverage: 85%
+```

monoco/features/i18n/resources/zh/{SKILL.md → skills/monoco_atom_i18n/SKILL.md} RENAMED Viewed

@@ -1,6 +1,8 @@
 ---
-name: monoco-i18n
+name: monoco_atom_i18n
 description: 文档国际化质量控制。确保多语言文档保持同步。
+type: atom
+version: 1.0.0
 ---
 # 文档国际化

monoco/features/i18n/resources/{skills/i18n_scan_workflow → zh/skills/monoco_workflow_i18n_scan}/SKILL.md RENAMED Viewed

@@ -1,7 +1,7 @@
 ---
-name: i18n-scan-workflow
+name: monoco_workflow_i18n_scan
 description: I18n 扫描工作流 (Flow Skill)。定义从扫描缺失翻译到生成翻译任务的标准操作流程，确保多语言文档质量。
-type: flow
+type: workflow
 domain: i18n
 version: 1.0.0
 ---

monoco-toolkit 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

monoco-toolkit 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl