monoco-toolkit 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monoco/__main__.py +8 -0
- monoco/core/artifacts/__init__.py +16 -0
- monoco/core/artifacts/manager.py +575 -0
- monoco/core/artifacts/models.py +161 -0
- monoco/core/automation/__init__.py +51 -0
- monoco/core/automation/config.py +338 -0
- monoco/core/automation/field_watcher.py +296 -0
- monoco/core/automation/handlers.py +723 -0
- monoco/core/config.py +31 -4
- monoco/core/executor/__init__.py +38 -0
- monoco/core/executor/agent_action.py +254 -0
- monoco/core/executor/git_action.py +303 -0
- monoco/core/executor/im_action.py +309 -0
- monoco/core/executor/pytest_action.py +218 -0
- monoco/core/git.py +38 -0
- monoco/core/hooks/context.py +74 -13
- monoco/core/ingestion/__init__.py +20 -0
- monoco/core/ingestion/discovery.py +248 -0
- monoco/core/ingestion/watcher.py +343 -0
- monoco/core/ingestion/worker.py +436 -0
- monoco/core/loader.py +633 -0
- monoco/core/registry.py +34 -25
- monoco/core/router/__init__.py +55 -0
- monoco/core/router/action.py +341 -0
- monoco/core/router/router.py +392 -0
- monoco/core/scheduler/__init__.py +63 -0
- monoco/core/scheduler/base.py +152 -0
- monoco/core/scheduler/engines.py +175 -0
- monoco/core/scheduler/events.py +171 -0
- monoco/core/scheduler/local.py +377 -0
- monoco/core/skills.py +119 -80
- monoco/core/watcher/__init__.py +57 -0
- monoco/core/watcher/base.py +365 -0
- monoco/core/watcher/dropzone.py +152 -0
- monoco/core/watcher/issue.py +303 -0
- monoco/core/watcher/memo.py +200 -0
- monoco/core/watcher/task.py +238 -0
- monoco/daemon/app.py +77 -1
- monoco/daemon/commands.py +10 -0
- monoco/daemon/events.py +34 -0
- monoco/daemon/mailroom_service.py +196 -0
- monoco/daemon/models.py +1 -0
- monoco/daemon/scheduler.py +207 -0
- monoco/daemon/services.py +27 -58
- monoco/daemon/triggers.py +55 -0
- monoco/features/agent/__init__.py +25 -7
- monoco/features/agent/adapter.py +17 -7
- monoco/features/agent/cli.py +91 -57
- monoco/features/agent/engines.py +31 -170
- monoco/{core/resources/en/skills/monoco_core → features/agent/resources/en/skills/monoco_atom_core}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_engineer → monoco_workflow_agent_engineer}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_manager → monoco_workflow_agent_manager}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_planner → monoco_workflow_agent_planner}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_reviewer → monoco_workflow_agent_reviewer}/SKILL.md +2 -2
- monoco/features/agent/resources/{roles/role-engineer.yaml → zh/roles/monoco_role_engineer.yaml} +3 -3
- monoco/features/agent/resources/{roles/role-manager.yaml → zh/roles/monoco_role_manager.yaml} +8 -8
- monoco/features/agent/resources/{roles/role-planner.yaml → zh/roles/monoco_role_planner.yaml} +8 -8
- monoco/features/agent/resources/{roles/role-reviewer.yaml → zh/roles/monoco_role_reviewer.yaml} +8 -8
- monoco/{core/resources/zh/skills/monoco_core → features/agent/resources/zh/skills/monoco_atom_core}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_engineer → monoco_workflow_agent_engineer}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_manager → monoco_workflow_agent_manager}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_planner → monoco_workflow_agent_planner}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_reviewer → monoco_workflow_agent_reviewer}/SKILL.md +2 -2
- monoco/features/agent/worker.py +1 -1
- monoco/features/artifact/__init__.py +0 -0
- monoco/features/artifact/adapter.py +33 -0
- monoco/features/artifact/resources/zh/AGENTS.md +14 -0
- monoco/features/artifact/resources/zh/skills/monoco_atom_artifact/SKILL.md +278 -0
- monoco/features/glossary/adapter.py +18 -7
- monoco/features/glossary/resources/en/skills/{monoco_glossary → monoco_atom_glossary}/SKILL.md +2 -2
- monoco/features/glossary/resources/zh/skills/{monoco_glossary → monoco_atom_glossary}/SKILL.md +2 -2
- monoco/features/hooks/__init__.py +11 -0
- monoco/features/hooks/adapter.py +67 -0
- monoco/features/hooks/commands.py +309 -0
- monoco/features/hooks/core.py +441 -0
- monoco/features/hooks/resources/ADDING_HOOKS.md +234 -0
- monoco/features/i18n/adapter.py +18 -5
- monoco/features/i18n/core.py +482 -17
- monoco/features/i18n/resources/en/skills/{monoco_i18n → monoco_atom_i18n}/SKILL.md +2 -2
- monoco/features/i18n/resources/en/skills/{i18n_scan_workflow → monoco_workflow_i18n_scan}/SKILL.md +2 -2
- monoco/features/i18n/resources/zh/skills/{monoco_i18n → monoco_atom_i18n}/SKILL.md +2 -2
- monoco/features/i18n/resources/zh/skills/{i18n_scan_workflow → monoco_workflow_i18n_scan}/SKILL.md +2 -2
- monoco/features/issue/adapter.py +19 -6
- monoco/features/issue/commands.py +352 -20
- monoco/features/issue/core.py +475 -16
- monoco/features/issue/engine/machine.py +114 -4
- monoco/features/issue/linter.py +60 -5
- monoco/features/issue/models.py +2 -2
- monoco/features/issue/resources/en/AGENTS.md +109 -0
- monoco/features/issue/resources/en/skills/{monoco_issue → monoco_atom_issue}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_create_workflow → monoco_workflow_issue_creation}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_develop_workflow → monoco_workflow_issue_development}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_lifecycle_workflow → monoco_workflow_issue_management}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_refine_workflow → monoco_workflow_issue_refinement}/SKILL.md +2 -2
- monoco/features/issue/resources/hooks/post-checkout.sh +39 -0
- monoco/features/issue/resources/hooks/pre-commit.sh +41 -0
- monoco/features/issue/resources/hooks/pre-push.sh +35 -0
- monoco/features/issue/resources/zh/AGENTS.md +109 -0
- monoco/features/issue/resources/zh/skills/{monoco_issue → monoco_atom_issue_lifecycle}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_create_workflow → monoco_workflow_issue_creation}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_develop_workflow → monoco_workflow_issue_development}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_lifecycle_workflow → monoco_workflow_issue_management}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_refine_workflow → monoco_workflow_issue_refinement}/SKILL.md +2 -2
- monoco/features/issue/validator.py +101 -1
- monoco/features/memo/adapter.py +21 -8
- monoco/features/memo/cli.py +103 -10
- monoco/features/memo/core.py +178 -92
- monoco/features/memo/models.py +53 -0
- monoco/features/memo/resources/en/skills/{monoco_memo → monoco_atom_memo}/SKILL.md +2 -2
- monoco/features/memo/resources/en/skills/{note_processing_workflow → monoco_workflow_note_processing}/SKILL.md +2 -2
- monoco/features/memo/resources/zh/skills/{monoco_memo → monoco_atom_memo}/SKILL.md +2 -2
- monoco/features/memo/resources/zh/skills/{note_processing_workflow → monoco_workflow_note_processing}/SKILL.md +2 -2
- monoco/features/spike/adapter.py +18 -5
- monoco/features/spike/commands.py +5 -3
- monoco/features/spike/resources/en/skills/{monoco_spike → monoco_atom_spike}/SKILL.md +2 -2
- monoco/features/spike/resources/en/skills/{research_workflow → monoco_workflow_research}/SKILL.md +2 -2
- monoco/features/spike/resources/zh/skills/{monoco_spike → monoco_atom_spike}/SKILL.md +2 -2
- monoco/features/spike/resources/zh/skills/{research_workflow → monoco_workflow_research}/SKILL.md +2 -2
- monoco/main.py +38 -1
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.12.dist-info}/METADATA +7 -1
- monoco_toolkit-0.3.12.dist-info/RECORD +202 -0
- monoco/features/agent/apoptosis.py +0 -44
- monoco/features/agent/manager.py +0 -91
- monoco/features/agent/session.py +0 -121
- monoco_toolkit-0.3.10.dist-info/RECORD +0 -156
- /monoco/{core → features/agent}/resources/en/AGENTS.md +0 -0
- /monoco/{core → features/agent}/resources/zh/AGENTS.md +0 -0
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.12.dist-info}/WHEEL +0 -0
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.12.dist-info}/entry_points.txt +0 -0
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.12.dist-info}/licenses/LICENSE +0 -0
monoco/features/i18n/core.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import fnmatch
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional, Tuple
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from dataclasses import dataclass
|
|
4
6
|
import re
|
|
5
7
|
|
|
6
8
|
DEFAULT_EXCLUDES = [
|
|
@@ -214,43 +216,506 @@ def check_translation_exists(
|
|
|
214
216
|
return missing
|
|
215
217
|
|
|
216
218
|
|
|
219
|
+
# Common technical terms that should not count as "English words"
|
|
220
|
+
# when detecting language in Chinese documents
|
|
221
|
+
TECHNICAL_TERMS_ALLOWLIST = {
|
|
222
|
+
# CLI/Shell
|
|
223
|
+
"cli", "api", "ui", "ux", "gui", "cli", "shell", "bash", "zsh", "sh",
|
|
224
|
+
"cmd", "powershell", "terminal", "console", "prompt",
|
|
225
|
+
# Cloud/Container
|
|
226
|
+
"kubernetes", "k8s", "docker", "container", "pod", "cluster", "node",
|
|
227
|
+
"namespace", "ingress", "service", "deployment", "helm", "kubectl",
|
|
228
|
+
"aws", "gcp", "azure", "cloud", "serverless", "lambda", "ec2", "s3",
|
|
229
|
+
# DevOps/CI/CD
|
|
230
|
+
"ci", "cd", "cicd", "pipeline", "jenkins", "gitlab", "github", "git",
|
|
231
|
+
"svn", "mercurial", "hg", "commit", "branch", "merge", "rebase", "tag",
|
|
232
|
+
"hook", "action", "workflow", "artifact", "build", "deploy", "release",
|
|
233
|
+
# Programming Languages
|
|
234
|
+
"python", "javascript", "js", "typescript", "ts", "java", "kotlin",
|
|
235
|
+
"scala", "groovy", "ruby", "go", "golang", "rust", "c", "cpp", "c++",
|
|
236
|
+
"csharp", "c#", "php", "perl", "lua", "swift", "objc", "objective-c",
|
|
237
|
+
"r", "matlab", "julia", "dart", "flutter", "elixir", "erlang", "haskell",
|
|
238
|
+
"clojure", "lisp", "scheme", "racket", "fsharp", "f#", "vb", "vba",
|
|
239
|
+
# Web/Frameworks
|
|
240
|
+
"html", "css", "scss", "sass", "less", "xml", "json", "yaml", "yml",
|
|
241
|
+
"toml", "ini", "csv", "tsv", "markdown", "md", "rst", "asciidoc",
|
|
242
|
+
"react", "vue", "angular", "svelte", "nextjs", "nuxt", "django",
|
|
243
|
+
"flask", "fastapi", "tornado", "express", "koa", "nestjs", "spring",
|
|
244
|
+
"rails", "laravel", "symfony", "dotnet", "aspnet", "mvc", "mvvm",
|
|
245
|
+
# Databases
|
|
246
|
+
"sql", "nosql", "mysql", "postgresql", "postgres", "sqlite", "oracle",
|
|
247
|
+
"mssql", "sqlserver", "mongodb", "mongo", "redis", "cassandra",
|
|
248
|
+
"dynamodb", "firebase", "elasticsearch", "solr", "neo4j", "graphql",
|
|
249
|
+
# Testing
|
|
250
|
+
"test", "testing", "unittest", "pytest", "jest", "mocha", "jasmine",
|
|
251
|
+
"cypress", "selenium", "cucumber", "bdd", "tdd", "mock", "stub",
|
|
252
|
+
"fixture", "assertion", "coverage", "benchmark", "profiling",
|
|
253
|
+
# Architecture/Patterns
|
|
254
|
+
"microservice", "microservices", "monolith", "server", "client",
|
|
255
|
+
"frontend", "backend", "fullstack", "api-gateway", "load-balancer",
|
|
256
|
+
"proxy", "cache", "cdn", "dns", "http", "https", "tcp", "udp",
|
|
257
|
+
"websocket", "grpc", "rest", "soap", "graphql", "oauth", "jwt",
|
|
258
|
+
"sso", "ldap", "auth", "authentication", "authorization",
|
|
259
|
+
# OS/Platform
|
|
260
|
+
"linux", "ubuntu", "debian", "centos", "rhel", "fedora", "arch",
|
|
261
|
+
"alpine", "windows", "macos", "darwin", "ios", "android",
|
|
262
|
+
"unix", "posix", "kernel", "systemd", "init", "daemon",
|
|
263
|
+
# Tools/IDE
|
|
264
|
+
"vscode", "idea", "pycharm", "webstorm", "vim", "neovim", "nvim",
|
|
265
|
+
"emacs", "sublime", "atom", "eclipse", "netbeans", "xcode",
|
|
266
|
+
"docker-compose", "dockerfile", "makefile", "cmake", "gradle",
|
|
267
|
+
"maven", "npm", "yarn", "pnpm", "pip", "conda", "venv", "virtualenv",
|
|
268
|
+
# AI/ML
|
|
269
|
+
"ai", "ml", "dl", "llm", "nlp", "cv", "neural", "network",
|
|
270
|
+
"tensorflow", "pytorch", "keras", "scikit", "sklearn", "pandas",
|
|
271
|
+
"numpy", "scipy", "matplotlib", "seaborn", "jupyter", "notebook",
|
|
272
|
+
"training", "inference", "model", "dataset", "vector", "embedding",
|
|
273
|
+
# Security
|
|
274
|
+
"security", "vulnerability", "exploit", "cve", "xss", "csrf",
|
|
275
|
+
"injection", "encryption", "decryption", "hash", "signature",
|
|
276
|
+
"certificate", "ssl", "tls", "https", "firewall", "vpn",
|
|
277
|
+
# Monitoring/Observability
|
|
278
|
+
"log", "logging", "metrics", "tracing", "observability", "monitoring",
|
|
279
|
+
"alert", "dashboard", "grafana", "prometheus", "elk", "splunk",
|
|
280
|
+
"datadog", "newrelic", "sentry", "bugsnag", "rollbar",
|
|
281
|
+
# Agile/Project Management
|
|
282
|
+
"agile", "scrum", "kanban", "sprint", "backlog", "epic", "story",
|
|
283
|
+
"task", "issue", "ticket", "bug", "feature", "milestone", "roadmap",
|
|
284
|
+
"retro", "standup", "review", "demo", "po", "sm", "pm",
|
|
285
|
+
# Misc Tech Terms
|
|
286
|
+
"id", "uuid", "guid", "url", "uri", "ip", "ipv4", "ipv6",
|
|
287
|
+
"mac", "hostname", "domain", "subdomain", "path", "query",
|
|
288
|
+
"header", "body", "payload", "request", "response", "status",
|
|
289
|
+
"error", "exception", "warning", "info", "debug", "trace",
|
|
290
|
+
"config", "configuration", "setting", "option", "flag", "env",
|
|
291
|
+
"variable", "constant", "literal", "expression", "statement",
|
|
292
|
+
"function", "method", "class", "object", "instance", "interface",
|
|
293
|
+
"abstract", "virtual", "override", "inherit", "extend", "implement",
|
|
294
|
+
"import", "export", "module", "package", "library", "framework",
|
|
295
|
+
"sdk", "toolkit", "runtime", "compiler", "interpreter", "vm",
|
|
296
|
+
"version", "release", "changelog", "license", "copyright",
|
|
297
|
+
"repo", "repository", "fork", "clone", "pull", "push", "fetch",
|
|
298
|
+
"upstream", "origin", "remote", "local", "stash", "stage",
|
|
299
|
+
"index", "working", "tree", "head", "detached", "orphan",
|
|
300
|
+
"squash", "amend", "cherry-pick", "revert", "reset", "clean",
|
|
301
|
+
"linter", "formatter", "parser", "lexer", "ast", "ir",
|
|
302
|
+
"bytecode", "opcode", "assembly", "binary", "executable",
|
|
303
|
+
"static", "dynamic", "linking", "compilation", "transpilation",
|
|
304
|
+
"minification", "bundling", "tree-shaking", "code-splitting",
|
|
305
|
+
"hot-reload", "hot-restart", "live-reload", "watch", "watchman",
|
|
306
|
+
"polyfill", "shim", "ponyfill", "fallback", "graceful",
|
|
307
|
+
"async", "sync", "parallel", "concurrent", "sequential",
|
|
308
|
+
"blocking", "non-blocking", "io", "nio", "epoll", "kqueue",
|
|
309
|
+
"thread", "process", "coroutine", "fiber", "goroutine",
|
|
310
|
+
"mutex", "lock", "semaphore", "channel", "queue", "stack",
|
|
311
|
+
"heap", "gc", "garbage", "collection", "memory", "leak",
|
|
312
|
+
"buffer", "stream", "pipe", "redirect", "tee", "cat",
|
|
313
|
+
"grep", "awk", "sed", "cut", "sort", "uniq", "wc", "head", "tail",
|
|
314
|
+
"find", "locate", "which", "whereis", "type", "alias",
|
|
315
|
+
"export", "source", "env", "printenv", "set", "unset",
|
|
316
|
+
"chmod", "chown", "chgrp", "umask", "sudo", "su",
|
|
317
|
+
"ssh", "scp", "sftp", "rsync", "ftp", "telnet", "nc",
|
|
318
|
+
"ping", "traceroute", "netstat", "ss", "lsof", "fuser",
|
|
319
|
+
"ps", "top", "htop", "kill", "pkill", "killall", "nice",
|
|
320
|
+
"cron", "at", "batch", "systemctl", "service", "init",
|
|
321
|
+
"mount", "umount", "df", "du", "fsck", "mkfs", "fdisk",
|
|
322
|
+
"parted", "lsblk", "blkid", "uuidgen", "tune2fs",
|
|
323
|
+
"tar", "gzip", "gunzip", "zip", "unzip", "bz2", "xz",
|
|
324
|
+
"7z", "rar", "archive", "compress", "decompress", "extract",
|
|
325
|
+
"curl", "wget", "httpie", "postman", "insomnia",
|
|
326
|
+
"nginx", "apache", "httpd", "tomcat", "jetty", "undertow",
|
|
327
|
+
"haproxy", "traefik", "envoy", "istio", "linkerd",
|
|
328
|
+
"rabbitmq", "kafka", "mqtt", "amqp", "stomp", "zeromq",
|
|
329
|
+
"memcached", "etcd", "consul", "vault", "zookeeper",
|
|
330
|
+
"prometheus", "grafana", "jaeger", "zipkin", "opentelemetry",
|
|
331
|
+
"ansible", "puppet", "chef", "saltstack", "terraform",
|
|
332
|
+
"pulumi", "vagrant", "packer", "nomad", "consul-template",
|
|
333
|
+
"github-actions", "gitlab-ci", "travis", "circleci", "jenkins",
|
|
334
|
+
"teamcity", "bamboo", "drone", "argo", "tekton", "spinnaker",
|
|
335
|
+
"sonarqube", "nexus", "artifactory", "harbor", "chartmuseum",
|
|
336
|
+
"loki", "fluentd", "fluent-bit", "vector", "filebeat",
|
|
337
|
+
"telegraf", "influxdb", "timescaledb", "promscale",
|
|
338
|
+
"minio", "ceph", "glusterfs", "nfs", "smb", "cifs",
|
|
339
|
+
"vpn", "wireguard", "openvpn", "ipsec", "ssl-vpn",
|
|
340
|
+
"waf", "ids", "ips", "siem", "soar", "xdr", "edr",
|
|
341
|
+
"ldap", "ad", "sso", "saml", "oauth2", "openid", "oidc",
|
|
342
|
+
"mfa", "2fa", "totp", "hotp", "u2f", "webauthn", "fido",
|
|
343
|
+
"aes", "rsa", "ecc", "dsa", "ecdsa", "ed25519", "curve25519",
|
|
344
|
+
"sha", "md5", "bcrypt", "scrypt", "argon2", "pbkdf2",
|
|
345
|
+
"hmac", "cmac", "gcm", "cbc", "ecb", "ctr", "ofb", "cfb",
|
|
346
|
+
"tls", "ssl", "x509", "csr", "crt", "pem", "der", "p12", "pfx",
|
|
347
|
+
"acme", "letsencrypt", "certbot", "traefik", "caddy",
|
|
348
|
+
"wasm", "webassembly", "wasmer", "wasmtime", "wasi",
|
|
349
|
+
"pwa", "spa", "mpa", "ssr", "csr", "ssg", "isr",
|
|
350
|
+
"amp", "instant", "turbo", "stimulus", "alpine", "htmx",
|
|
351
|
+
"webcomponents", "shadow", "dom", "custom", "elements",
|
|
352
|
+
"service-worker", "pwa", "manifest", "offline", "cache",
|
|
353
|
+
"webrtc", "websocket", "sse", "eventsource", "polling",
|
|
354
|
+
"graphql", "subscription", "mutation", "query", "schema",
|
|
355
|
+
"resolver", "directive", "fragment", "interface", "union",
|
|
356
|
+
"prisma", "sequelize", "typeorm", "sqlalchemy", "orm",
|
|
357
|
+
"migration", "seed", "factory", "fixture", "mock", "stub",
|
|
358
|
+
"faker", "factory-boy", "hypothesis", "property-based",
|
|
359
|
+
"snapshot", "visual", "regression", "e2e", "integration",
|
|
360
|
+
"unit", "functional", "acceptance", "performance", "load",
|
|
361
|
+
"stress", "chaos", "contract", "pact", "consumer", "provider",
|
|
362
|
+
"tdd", "bdd", "atdd", "sbe", "example", "specification",
|
|
363
|
+
"given", "when", "then", "scenario", "feature", "background",
|
|
364
|
+
"cucumber", "behave", "specflow", "gauge", "relish",
|
|
365
|
+
"allure", "reportportal", "xunit", "nunit", "mstest",
|
|
366
|
+
"sonar", "coveralls", "codecov", "codeclimate", "codacy",
|
|
367
|
+
"deepsource", "snyk", "whitesource", "blackduck", "fossa",
|
|
368
|
+
"dependabot", "renovate", "snyk", "greenkeeper",
|
|
369
|
+
"pre-commit", "husky", "lint-staged", "commitlint",
|
|
370
|
+
"semantic-release", "standard-version", "conventional",
|
|
371
|
+
"changelog", "commitizen", "cz", "commitlint",
|
|
372
|
+
"monoco", "kimi", "claude", "gemini", "qwen", "gpt",
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
|
|
217
376
|
def detect_language(content: str) -> str:
|
|
218
377
|
"""
|
|
219
|
-
Detect the language of the content using
|
|
378
|
+
Detect the language of the content using improved heuristics.
|
|
379
|
+
|
|
380
|
+
This function is designed to handle technical documents with mixed
|
|
381
|
+
Chinese and English content, especially for IT/Software development topics.
|
|
382
|
+
|
|
220
383
|
Returns: 'zh', 'en', or 'unknown'
|
|
221
384
|
"""
|
|
222
385
|
if not content:
|
|
223
386
|
return "unknown"
|
|
224
387
|
|
|
225
388
|
# Strip YAML Frontmatter if present
|
|
226
|
-
# Matches --- at start, followed by anything, followed by ---
|
|
227
389
|
frontmatter_pattern = re.compile(r"^---\n.*?\n---\n", re.DOTALL)
|
|
228
390
|
content = frontmatter_pattern.sub("", content)
|
|
229
391
|
|
|
230
392
|
if not content.strip():
|
|
231
393
|
return "unknown"
|
|
232
394
|
|
|
233
|
-
#
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
395
|
+
# Remove code blocks (```...```) as they often contain English keywords
|
|
396
|
+
code_block_pattern = re.compile(r"```[\s\S]*?```", re.MULTILINE)
|
|
397
|
+
content_no_code = code_block_pattern.sub("", content)
|
|
398
|
+
|
|
399
|
+
# Remove inline code (`...`)
|
|
400
|
+
inline_code_pattern = re.compile(r"`[^`]+`")
|
|
401
|
+
content_no_code = inline_code_pattern.sub("", content_no_code)
|
|
402
|
+
|
|
403
|
+
# Remove URLs
|
|
404
|
+
url_pattern = re.compile(r"https?://\S+|www\.\S+|")
|
|
405
|
+
content_clean = url_pattern.sub("", content_no_code)
|
|
406
|
+
|
|
407
|
+
# Remove issue IDs (EPIC-0001, FEAT-1234, etc.)
|
|
408
|
+
issue_id_pattern = re.compile(r"\b(EPIC|FEAT|CHORE|FIX)-\d{4}\b")
|
|
409
|
+
content_clean = issue_id_pattern.sub("", content_clean)
|
|
410
|
+
|
|
411
|
+
if not content_clean.strip():
|
|
412
|
+
# If after cleaning there's nothing left, it was likely all code/IDs
|
|
413
|
+
return "unknown"
|
|
238
414
|
|
|
239
|
-
|
|
240
|
-
|
|
415
|
+
total_chars = len(content_clean)
|
|
416
|
+
|
|
417
|
+
# Count CJK characters (Chinese/Japanese/Korean)
|
|
418
|
+
cjk_count = sum(1 for c in content_clean if "\u4e00" <= c <= "\u9fff")
|
|
419
|
+
|
|
420
|
+
# Count non-ASCII characters (excluding CJK)
|
|
421
|
+
non_ascii_non_cjk = sum(
|
|
422
|
+
1 for c in content_clean
|
|
423
|
+
if ord(c) > 127 and not ("\u4e00" <= c <= "\u9fff")
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Extract words for analysis (alphanumeric sequences)
|
|
427
|
+
words = re.findall(r"\b[a-zA-Z][a-zA-Z0-9]*\b", content_clean)
|
|
428
|
+
total_words = len(words)
|
|
429
|
+
|
|
430
|
+
# Count technical terms in allowlist (case-insensitive)
|
|
431
|
+
technical_term_count = sum(
|
|
432
|
+
1 for word in words
|
|
433
|
+
if word.lower() in TECHNICAL_TERMS_ALLOWLIST
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# Calculate non-technical English words
|
|
437
|
+
non_technical_words = total_words - technical_term_count
|
|
438
|
+
|
|
439
|
+
# Heuristic 1: If > 3% chars are CJK, likely Chinese document
|
|
440
|
+
# Lowered threshold from 5% to 3% for better Chinese detection
|
|
441
|
+
cjk_ratio = cjk_count / total_chars if total_chars > 0 else 0
|
|
442
|
+
if cjk_ratio > 0.03:
|
|
241
443
|
return "zh"
|
|
242
|
-
|
|
243
|
-
# 2
|
|
244
|
-
#
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
444
|
+
|
|
445
|
+
# Heuristic 2: If significant CJK (>1%) and some English technical terms,
|
|
446
|
+
# treat as Chinese (technical Chinese document)
|
|
447
|
+
if cjk_ratio > 0.01 and technical_term_count > 0:
|
|
448
|
+
return "zh"
|
|
449
|
+
|
|
450
|
+
# Heuristic 3: For English detection
|
|
451
|
+
# Only count non-technical English words towards English detection
|
|
452
|
+
# Require at least 10 non-technical words to be considered English
|
|
453
|
+
non_ascii_ratio = non_ascii_non_cjk / total_chars if total_chars > 0 else 0
|
|
454
|
+
|
|
455
|
+
# Relaxed threshold: < 15% non-ASCII (excluding CJK) AND
|
|
456
|
+
# has meaningful non-technical English content
|
|
457
|
+
if non_ascii_ratio < 0.15 and non_technical_words >= 10:
|
|
458
|
+
return "en"
|
|
459
|
+
|
|
460
|
+
# Heuristic 4: High English word density with low CJK
|
|
461
|
+
if cjk_ratio < 0.01 and total_words > 20:
|
|
249
462
|
return "en"
|
|
250
463
|
|
|
251
464
|
return "unknown"
|
|
252
465
|
|
|
253
466
|
|
|
467
|
+
class BlockType(Enum):
|
|
468
|
+
"""Types of content blocks in Markdown."""
|
|
469
|
+
HEADING = "heading"
|
|
470
|
+
PARAGRAPH = "paragraph"
|
|
471
|
+
CODE_BLOCK = "code_block"
|
|
472
|
+
LIST_ITEM = "list_item"
|
|
473
|
+
QUOTE = "quote"
|
|
474
|
+
TABLE = "table"
|
|
475
|
+
EMPTY = "empty"
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@dataclass
|
|
479
|
+
class ContentBlock:
|
|
480
|
+
"""Represents a block of content with its type and language info."""
|
|
481
|
+
type: BlockType
|
|
482
|
+
content: str
|
|
483
|
+
line_start: int
|
|
484
|
+
line_end: int
|
|
485
|
+
detected_lang: str = "unknown"
|
|
486
|
+
should_skip: bool = False
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def parse_markdown_blocks(content: str) -> List[ContentBlock]:
|
|
490
|
+
"""
|
|
491
|
+
Parse markdown content into blocks for language detection.
|
|
492
|
+
|
|
493
|
+
This function respects block boundaries like:
|
|
494
|
+
- Code blocks (```...```)
|
|
495
|
+
- Headings (# ...)
|
|
496
|
+
- Paragraphs
|
|
497
|
+
- List items
|
|
498
|
+
|
|
499
|
+
Returns a list of ContentBlock objects.
|
|
500
|
+
"""
|
|
501
|
+
# Strip YAML Frontmatter if present
|
|
502
|
+
frontmatter_pattern = re.compile(r"^---\n.*?\n---\n", re.DOTALL)
|
|
503
|
+
content_without_fm = frontmatter_pattern.sub("", content)
|
|
504
|
+
|
|
505
|
+
lines = content_without_fm.splitlines()
|
|
506
|
+
blocks = []
|
|
507
|
+
current_block_lines = []
|
|
508
|
+
current_block_type = BlockType.PARAGRAPH
|
|
509
|
+
current_start_line = 0
|
|
510
|
+
in_code_block = False
|
|
511
|
+
code_block_lang = ""
|
|
512
|
+
|
|
513
|
+
def flush_block():
|
|
514
|
+
nonlocal current_block_lines, current_start_line
|
|
515
|
+
if current_block_lines:
|
|
516
|
+
content = "\n".join(current_block_lines)
|
|
517
|
+
block = ContentBlock(
|
|
518
|
+
type=current_block_type,
|
|
519
|
+
content=content,
|
|
520
|
+
line_start=current_start_line,
|
|
521
|
+
line_end=current_start_line + len(current_block_lines),
|
|
522
|
+
)
|
|
523
|
+
blocks.append(block)
|
|
524
|
+
current_block_lines = []
|
|
525
|
+
|
|
526
|
+
for i, line in enumerate(lines):
|
|
527
|
+
# Code block handling
|
|
528
|
+
if line.strip().startswith("```"):
|
|
529
|
+
if not in_code_block:
|
|
530
|
+
# Start of code block
|
|
531
|
+
flush_block()
|
|
532
|
+
in_code_block = True
|
|
533
|
+
code_block_lang = line.strip()[3:].strip()
|
|
534
|
+
current_block_type = BlockType.CODE_BLOCK
|
|
535
|
+
current_start_line = i
|
|
536
|
+
current_block_lines.append(line)
|
|
537
|
+
else:
|
|
538
|
+
# End of code block
|
|
539
|
+
current_block_lines.append(line)
|
|
540
|
+
flush_block()
|
|
541
|
+
in_code_block = False
|
|
542
|
+
current_block_type = BlockType.PARAGRAPH
|
|
543
|
+
continue
|
|
544
|
+
|
|
545
|
+
if in_code_block:
|
|
546
|
+
current_block_lines.append(line)
|
|
547
|
+
continue
|
|
548
|
+
|
|
549
|
+
# Heading
|
|
550
|
+
if re.match(r"^#{1,6}\s", line):
|
|
551
|
+
flush_block()
|
|
552
|
+
block = ContentBlock(
|
|
553
|
+
type=BlockType.HEADING,
|
|
554
|
+
content=line,
|
|
555
|
+
line_start=i,
|
|
556
|
+
line_end=i + 1,
|
|
557
|
+
)
|
|
558
|
+
blocks.append(block)
|
|
559
|
+
current_start_line = i + 1
|
|
560
|
+
current_block_type = BlockType.PARAGRAPH
|
|
561
|
+
continue
|
|
562
|
+
|
|
563
|
+
# Empty line
|
|
564
|
+
if not line.strip():
|
|
565
|
+
flush_block()
|
|
566
|
+
blocks.append(ContentBlock(
|
|
567
|
+
type=BlockType.EMPTY,
|
|
568
|
+
content="",
|
|
569
|
+
line_start=i,
|
|
570
|
+
line_end=i + 1,
|
|
571
|
+
))
|
|
572
|
+
current_start_line = i + 1
|
|
573
|
+
current_block_type = BlockType.PARAGRAPH
|
|
574
|
+
continue
|
|
575
|
+
|
|
576
|
+
# List item
|
|
577
|
+
if re.match(r"^\s*[-*+]\s", line) or re.match(r"^\s*\d+\.\s", line):
|
|
578
|
+
flush_block()
|
|
579
|
+
current_block_type = BlockType.LIST_ITEM
|
|
580
|
+
current_start_line = i
|
|
581
|
+
current_block_lines.append(line)
|
|
582
|
+
continue
|
|
583
|
+
|
|
584
|
+
# Quote
|
|
585
|
+
if line.strip().startswith(">"):
|
|
586
|
+
flush_block()
|
|
587
|
+
current_block_type = BlockType.QUOTE
|
|
588
|
+
current_start_line = i
|
|
589
|
+
current_block_lines.append(line)
|
|
590
|
+
continue
|
|
591
|
+
|
|
592
|
+
# Table row
|
|
593
|
+
if "|" in line and not line.strip().startswith("#"):
|
|
594
|
+
if current_block_type != BlockType.TABLE:
|
|
595
|
+
flush_block()
|
|
596
|
+
current_block_type = BlockType.TABLE
|
|
597
|
+
current_start_line = i
|
|
598
|
+
current_block_lines.append(line)
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
# Default: accumulate into paragraph
|
|
602
|
+
if not current_block_lines:
|
|
603
|
+
current_start_line = i
|
|
604
|
+
current_block_lines.append(line)
|
|
605
|
+
|
|
606
|
+
# Flush remaining
|
|
607
|
+
flush_block()
|
|
608
|
+
|
|
609
|
+
return blocks
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def should_skip_block_for_language_check(
|
|
613
|
+
block: ContentBlock,
|
|
614
|
+
all_blocks: List[ContentBlock],
|
|
615
|
+
block_index: int,
|
|
616
|
+
source_lang: str = "zh"
|
|
617
|
+
) -> bool:
|
|
618
|
+
"""
|
|
619
|
+
Determine if a block should be skipped during language consistency checks.
|
|
620
|
+
|
|
621
|
+
Design Principle:
|
|
622
|
+
- Narrative text should be in the source language (e.g., Chinese)
|
|
623
|
+
- English should only appear as isolated nouns (technical terms, filenames, code blocks)
|
|
624
|
+
|
|
625
|
+
Reasons to skip:
|
|
626
|
+
1. Code blocks (always contain English keywords)
|
|
627
|
+
2. Empty blocks
|
|
628
|
+
3. Blocks with only technical terms/IDs/filenames
|
|
629
|
+
"""
|
|
630
|
+
# Always skip code blocks
|
|
631
|
+
if block.type == BlockType.CODE_BLOCK:
|
|
632
|
+
return True
|
|
633
|
+
|
|
634
|
+
# Skip empty blocks
|
|
635
|
+
if block.type == BlockType.EMPTY:
|
|
636
|
+
return True
|
|
637
|
+
|
|
638
|
+
# Check if block contains only technical content
|
|
639
|
+
content = block.content.strip()
|
|
640
|
+
if not content:
|
|
641
|
+
return True
|
|
642
|
+
|
|
643
|
+
# Remove common non-language elements
|
|
644
|
+
cleaned = content
|
|
645
|
+
# Remove inline code
|
|
646
|
+
cleaned = re.sub(r"`[^`]+`", "", cleaned)
|
|
647
|
+
# Remove URLs
|
|
648
|
+
cleaned = re.sub(r"https?://\S+|www\.\S+", "", cleaned)
|
|
649
|
+
# Remove issue IDs
|
|
650
|
+
cleaned = re.sub(r"\b(EPIC|FEAT|CHORE|FIX)-\d{4}\b", "", cleaned)
|
|
651
|
+
# Remove file paths
|
|
652
|
+
cleaned = re.sub(r"[\w\-]+\.[\w\-]+", "", cleaned)
|
|
653
|
+
|
|
654
|
+
if not cleaned.strip():
|
|
655
|
+
return True
|
|
656
|
+
|
|
657
|
+
return False
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def detect_language_blocks(content: str, source_lang: str = "zh") -> List[ContentBlock]:
|
|
661
|
+
"""
|
|
662
|
+
Detect language for each block in the content.
|
|
663
|
+
|
|
664
|
+
This provides block-level language detection that respects:
|
|
665
|
+
- Code blocks (skipped)
|
|
666
|
+
- Technical terms (handled by detect_language)
|
|
667
|
+
- Paragraph boundaries
|
|
668
|
+
|
|
669
|
+
Design Principle:
|
|
670
|
+
- Narrative text should be in the source language
|
|
671
|
+
- English should only appear as isolated nouns
|
|
672
|
+
|
|
673
|
+
Returns a list of ContentBlock objects with detected language.
|
|
674
|
+
"""
|
|
675
|
+
blocks = parse_markdown_blocks(content)
|
|
676
|
+
|
|
677
|
+
for i, block in enumerate(blocks):
|
|
678
|
+
# Determine if this block should be skipped
|
|
679
|
+
block.should_skip = should_skip_block_for_language_check(
|
|
680
|
+
block, blocks, i, source_lang
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
if block.should_skip:
|
|
684
|
+
block.detected_lang = "unknown"
|
|
685
|
+
continue
|
|
686
|
+
|
|
687
|
+
# Detect language for this specific block
|
|
688
|
+
block.detected_lang = detect_language(block.content)
|
|
689
|
+
|
|
690
|
+
return blocks
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def has_language_mismatch_blocks(content: str, source_lang: str = "zh") -> Tuple[bool, List[ContentBlock]]:
|
|
694
|
+
"""
|
|
695
|
+
Check if content has language mismatches at block level.
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
(has_mismatch, mismatched_blocks)
|
|
699
|
+
- has_mismatch: True if any non-skipped block has mismatched language
|
|
700
|
+
- mismatched_blocks: List of blocks that don't match source language
|
|
701
|
+
"""
|
|
702
|
+
blocks = detect_language_blocks(content, source_lang)
|
|
703
|
+
mismatched = []
|
|
704
|
+
|
|
705
|
+
for block in blocks:
|
|
706
|
+
if block.should_skip or block.detected_lang == "unknown":
|
|
707
|
+
continue
|
|
708
|
+
|
|
709
|
+
if source_lang.lower() in ["zh", "cn"]:
|
|
710
|
+
if block.detected_lang == "en":
|
|
711
|
+
mismatched.append(block)
|
|
712
|
+
elif source_lang.lower() == "en":
|
|
713
|
+
if block.detected_lang == "zh":
|
|
714
|
+
mismatched.append(block)
|
|
715
|
+
|
|
716
|
+
return len(mismatched) > 0, mismatched
|
|
717
|
+
|
|
718
|
+
|
|
254
719
|
def is_content_source_language(path: Path, source_lang: str = "en") -> bool:
|
|
255
720
|
"""
|
|
256
721
|
Check if file content appears to be in the source language.
|
monoco/features/i18n/resources/en/skills/{i18n_scan_workflow → monoco_workflow_i18n_scan}/SKILL.md
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
2
|
+
name: monoco_workflow_i18n_scan
|
|
3
3
|
description: I18n Scan Workflow (Flow Skill). Defines the standard operational process from scanning missing translations to generating translation tasks, ensuring multilingual documentation quality.
|
|
4
|
-
type:
|
|
4
|
+
type: workflow
|
|
5
5
|
domain: i18n
|
|
6
6
|
version: 1.0.0
|
|
7
7
|
---
|
monoco/features/issue/adapter.py
CHANGED
|
@@ -1,19 +1,32 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import Dict
|
|
3
|
-
from monoco.core.
|
|
3
|
+
from monoco.core.loader import FeatureModule, FeatureMetadata
|
|
4
|
+
from monoco.core.feature import IntegrationData
|
|
4
5
|
from monoco.features.issue import core
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
class IssueFeature(
|
|
8
|
-
|
|
9
|
-
def name(self) -> str:
|
|
10
|
-
return "issue"
|
|
8
|
+
class IssueFeature(FeatureModule):
|
|
9
|
+
"""Issue management feature module with unified lifecycle support."""
|
|
11
10
|
|
|
12
|
-
|
|
11
|
+
@property
|
|
12
|
+
def metadata(self) -> FeatureMetadata:
|
|
13
|
+
return FeatureMetadata(
|
|
14
|
+
name="issue",
|
|
15
|
+
version="1.0.0",
|
|
16
|
+
description="Issue management system for Monoco",
|
|
17
|
+
dependencies=["core"],
|
|
18
|
+
priority=10, # High priority - load early
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
def _on_mount(self, context: "FeatureContext") -> None: # type: ignore
|
|
22
|
+
"""Initialize issue feature with workspace context."""
|
|
23
|
+
root = context.root
|
|
24
|
+
config = context.config
|
|
13
25
|
issues_path = root / config.get("paths", {}).get("issues", "Issues")
|
|
14
26
|
core.init(issues_path)
|
|
15
27
|
|
|
16
28
|
def integrate(self, root: Path, config: Dict) -> IntegrationData:
|
|
29
|
+
"""Provide integration data for agent environment."""
|
|
17
30
|
# Determine language from config, default to 'en'
|
|
18
31
|
lang = config.get("i18n", {}).get("source_lang", "en")
|
|
19
32
|
|