monoco-toolkit 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. monoco/cli/project.py +15 -7
  2. monoco/cli/workspace.py +11 -3
  3. monoco/core/agent/adapters.py +24 -1
  4. monoco/core/config.py +81 -3
  5. monoco/core/integrations.py +8 -0
  6. monoco/core/lsp.py +7 -0
  7. monoco/core/output.py +8 -1
  8. monoco/core/resources/en/SKILL.md +1 -1
  9. monoco/core/setup.py +8 -1
  10. monoco/daemon/app.py +18 -12
  11. monoco/features/agent/commands.py +94 -17
  12. monoco/features/agent/core.py +48 -0
  13. monoco/features/agent/resources/en/critique.prompty +16 -0
  14. monoco/features/agent/resources/en/develop.prompty +16 -0
  15. monoco/features/agent/resources/en/investigate.prompty +16 -0
  16. monoco/features/agent/resources/en/refine.prompty +14 -0
  17. monoco/features/agent/resources/en/verify.prompty +16 -0
  18. monoco/features/agent/resources/zh/critique.prompty +18 -0
  19. monoco/features/agent/resources/zh/develop.prompty +18 -0
  20. monoco/features/agent/resources/zh/investigate.prompty +18 -0
  21. monoco/features/agent/resources/zh/refine.prompty +16 -0
  22. monoco/features/agent/resources/zh/verify.prompty +18 -0
  23. monoco/features/config/commands.py +35 -14
  24. monoco/features/i18n/commands.py +89 -10
  25. monoco/features/i18n/core.py +112 -16
  26. monoco/features/issue/commands.py +254 -85
  27. monoco/features/issue/core.py +142 -119
  28. monoco/features/issue/domain/__init__.py +0 -0
  29. monoco/features/issue/domain/lifecycle.py +126 -0
  30. monoco/features/issue/domain/models.py +170 -0
  31. monoco/features/issue/domain/parser.py +223 -0
  32. monoco/features/issue/domain/workspace.py +104 -0
  33. monoco/features/issue/engine/__init__.py +22 -0
  34. monoco/features/issue/engine/config.py +189 -0
  35. monoco/features/issue/engine/machine.py +185 -0
  36. monoco/features/issue/engine/models.py +18 -0
  37. monoco/features/issue/linter.py +32 -11
  38. monoco/features/issue/lsp/__init__.py +3 -0
  39. monoco/features/issue/lsp/definition.py +72 -0
  40. monoco/features/issue/models.py +8 -8
  41. monoco/features/issue/validator.py +204 -65
  42. monoco/features/spike/commands.py +45 -24
  43. monoco/features/spike/core.py +5 -22
  44. monoco/main.py +11 -17
  45. {monoco_toolkit-0.2.4.dist-info → monoco_toolkit-0.2.6.dist-info}/METADATA +1 -1
  46. monoco_toolkit-0.2.6.dist-info/RECORD +96 -0
  47. monoco/features/issue/executions/refine.md +0 -26
  48. monoco/features/pty/core.py +0 -185
  49. monoco/features/pty/router.py +0 -138
  50. monoco/features/pty/server.py +0 -56
  51. monoco_toolkit-0.2.4.dist-info/RECORD +0 -78
  52. {monoco_toolkit-0.2.4.dist-info → monoco_toolkit-0.2.6.dist-info}/WHEEL +0 -0
  53. {monoco_toolkit-0.2.4.dist-info → monoco_toolkit-0.2.6.dist-info}/entry_points.txt +0 -0
  54. {monoco_toolkit-0.2.4.dist-info → monoco_toolkit-0.2.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: critique
3
+ description: Critique the code or design against requirements.
4
+ provider: chat
5
+ ---
6
+ You are a Principal Code Reviewer.
7
+ Your task is to critique the following implementation/design:
8
+
9
+ {{file}}
10
+
11
+ # Instructions
12
+ 1. **Gap Analysis**: Check if all Acceptance Criteria are met.
13
+ 2. **Code Quality**: Identify potential bugs, security issues, or performance bottlenecks.
14
+ 3. **Design Patterns**: Suggest better patterns if applicable.
15
+
16
+ Output a structured review.
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: develop
3
+ description: 执行核心开发任务:编码与单元测试。
4
+ provider: chat
5
+ ---
6
+ You are a Principal Software Engineer.
7
+ Your task is to implement the feature described in this Issue:
8
+
9
+ {{file}}
10
+
11
+ # Instructions
12
+ 1. **Implementation**: Write the code required to satisfy the Acceptance Criteria.
13
+ 2. **Unit Tests**: Add or update unit tests to verify your changes.
14
+ 3. **Linting**: Ensure code follows project style guidelines.
15
+
16
+ Please provide the code changes.
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: investigate
3
+ description: 扫描 Codebase,识别架构设计、业务约束,丰富 Issue 引用网络。
4
+ provider: chat
5
+ ---
6
+ You are a Senior Software Architect.
7
+ Your task is to investigate the feasibility and impact of the following Issue:
8
+
9
+ {{file}}
10
+
11
+ # Instructions
12
+ 1. **Architecture Scan**: Identify which modules/files need to be modified.
13
+ 2. **Dependency Analysis**: Suggest `parent`, `dependencies`, and `related` issues.
14
+ 3. **Constraint Check**: Identify any business rules or technical constraints (e.g., Billing module impact).
15
+
16
+ Output your findings as a comment or update the Issue description directly.
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: refine
3
+ description: 细化需求,将 Draft 转化为可执行的 Plan。
4
+ provider: chat
5
+ ---
6
+ You are a Product Owner / Systems Analyst.
7
+ Refine the following Draft Issue into a concrete Technical Plan:
8
+
9
+ {{file}}
10
+
11
+ # Instructions
12
+ 1. **Clarify Objective**: Ensure the goal is specific and standard.
13
+ 2. **Expand Criteria**: Add detailed Acceptance Criteria.
14
+ 3. **Breakdown Tasks**: List concrete Technical Tasks.
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: verify
3
+ description: 代为检查工单完整性、代码质量与逻辑一致性。
4
+ provider: chat
5
+ ---
6
+ You are a QA Lead / Release Manager.
7
+ Your task is to verify if the following Issue is ready for delivery:
8
+
9
+ {{file}}
10
+
11
+ # Instructions
12
+ 1. **Definition of Done**: Check if all Technical Tasks are checked and Acceptance Criteria met.
13
+ 2. **Quality Check**: Review the code changes (if provided in context) for bugs or antipatterns.
14
+ 3. **Completeness**: Ensure documentation and tests are present.
15
+
16
+ Output "PASS" if ready, or a list of "BLOCKERS" if not.
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: critique
3
+ description: 对当前实现或设计进行批判性审查。
4
+ provider: chat
5
+ when:
6
+ stageMatch: "review"
7
+ ---
8
+ 你是一位首席代码审查员。
9
+ 你的任务是对以下实现/设计进行批判性审查:
10
+
11
+ {{file}}
12
+
13
+ # 指令
14
+ 1. **差距分析**: 检查是否满足所有验收标准。
15
+ 2. **代码质量**: 识别潜在的 Bug、安全问题或性能瓶颈。
16
+ 3. **设计模式**: 如果适用,建议更好的设计模式。
17
+
18
+ 输出结构化的审查意见。
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: develop
3
+ description: 执行核心开发任务:编码与单元测试。
4
+ provider: chat
5
+ when:
6
+ stageMatch: "doing"
7
+ ---
8
+ 你是一位首席软件工程师。
9
+ 你的任务是实现此 Issue 中描述的功能:
10
+
11
+ {{file}}
12
+
13
+ # 指令
14
+ 1. **代码实现**: 编写满足验收标准所需的代码。
15
+ 2. **单元测试**: 添加或更新单元测试以验证你的更改。
16
+ 3. **代码规范**: 确保代码遵循项目风格指南 (Linting)。
17
+
18
+ 请提供代码变更。
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: investigate
3
+ description: 扫描代码库,识别架构设计、业务约束,丰富 Issue 引用网络。
4
+ provider: chat
5
+ when:
6
+ statusMatch: "open"
7
+ ---
8
+ 你是一位资深软件架构师。
9
+ 你的任务是调查以下 Issue 的可行性和影响:
10
+
11
+ {{file}}
12
+
13
+ # 指令
14
+ 1. **架构扫描**: 识别哪些模块/文件需要修改。
15
+ 2. **依赖分析**: 建议 `parent` (父级), `dependencies` (依赖), 和 `related` (相关) 议题。
16
+ 3. **约束检查**: 识别任何业务规则或技术约束(例如,即使很小的改动也可能影响计费模块)。
17
+
18
+ 请以评论形式输出你的发现,或直接更新 Issue 描述。
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: refine
3
+ description: 细化需求,将 Draft 转化为可执行的 Plan。
4
+ provider: chat
5
+ when:
6
+ stageMatch: "draft"
7
+ ---
8
+ 你是一位产品负责人 (PO) / 系统分析师。
9
+ 将以下草案 (Draft) Issue 细化为具体的技术计划:
10
+
11
+ {{file}}
12
+
13
+ # 指令
14
+ 1. **阐明目标**: 确保目标具体且标准。
15
+ 2. **扩展标准**: 添加详细的验收标准 (Acceptance Criteria)。
16
+ 3. **任务分解**: 列出具体的技术任务 (Technical Tasks)。
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: verify
3
+ description: 代为检查工单完整性、代码质量与逻辑一致性。
4
+ provider: chat
5
+ when:
6
+ stageMatch: "review|done"
7
+ ---
8
+ 你是一位 QA 负责人 / 发布经理。
9
+ 你的任务是验证以下 Issue 是否已准备好交付:
10
+
11
+ {{file}}
12
+
13
+ # 指令
14
+ 1. **完成定义 (DoD)**: 检查是否所有技术任务 (Technical Tasks) 都已勾选,且满足验收标准。
15
+ 2. **质量检查**: 审查代码变更(如果在上下文中提供了),查找 Bug 或反模式。
16
+ 3. **完整性**: 确保文档和测试都已存在。
17
+
18
+ 如果准备好了,输出 "PASS";否则列出 "BLOCKERS" (阻碍项)。
@@ -2,7 +2,7 @@ import typer
2
2
  import yaml
3
3
  import json
4
4
  from pathlib import Path
5
- from typing import Optional, Any
5
+ from typing import Optional, Any, Annotated
6
6
  from rich.console import Console
7
7
  from rich.syntax import Syntax
8
8
  from pydantic import ValidationError
@@ -15,6 +15,7 @@ from monoco.core.config import (
15
15
  save_raw_config,
16
16
  get_config_path
17
17
  )
18
+ from monoco.core.output import AgentOutput, OutputManager
18
19
 
19
20
  app = typer.Typer(help="Manage Monoco configuration")
20
21
  console = Console()
@@ -38,12 +39,17 @@ def _parse_value(value: str) -> Any:
38
39
  @app.command()
39
40
  def show(
40
41
  output: str = typer.Option("yaml", "--output", "-o", help="Output format: yaml or json"),
42
+ json_output: AgentOutput = False,
41
43
  ):
42
44
  """Show the currently active (merged) configuration."""
43
45
  config = get_config()
44
46
  # Pydantic v1/v2 compat: use dict() or model_dump()
45
47
  data = config.dict()
46
48
 
49
+ if OutputManager.is_agent_mode():
50
+ OutputManager.print(data)
51
+ return
52
+
47
53
  if output == "json":
48
54
  print(json.dumps(data, indent=2))
49
55
  else:
@@ -52,7 +58,10 @@ def show(
52
58
  console.print(syntax)
53
59
 
54
60
  @app.command()
55
- def get(key: str = typer.Argument(..., help="Configuration key (e.g. project.name)")):
61
+ def get(
62
+ key: str = typer.Argument(..., help="Configuration key (e.g. project.name)"),
63
+ json_output: AgentOutput = False,
64
+ ):
56
65
  """Get a specific configuration value."""
57
66
  config = get_config()
58
67
  data = config.dict()
@@ -64,22 +73,26 @@ def get(key: str = typer.Argument(..., help="Configuration key (e.g. project.nam
64
73
  if isinstance(current, dict) and part in current:
65
74
  current = current[part]
66
75
  else:
67
- console.print(f"[red]Key '{key}' not found.[/red]")
76
+ OutputManager.error(f"Key '{key}' not found.")
68
77
  raise typer.Exit(code=1)
69
78
 
70
- if isinstance(current, (dict, list)):
71
- if isinstance(current, dict):
72
- print(yaml.dump(current, default_flow_style=False))
73
- else:
74
- print(json.dumps(current))
79
+ if OutputManager.is_agent_mode():
80
+ OutputManager.print({"key": key, "value": current})
75
81
  else:
76
- print(current)
82
+ if isinstance(current, (dict, list)):
83
+ if isinstance(current, dict):
84
+ print(yaml.dump(current, default_flow_style=False))
85
+ else:
86
+ print(json.dumps(current))
87
+ else:
88
+ print(current)
77
89
 
78
90
  @app.command(name="set")
79
91
  def set_val(
80
92
  key: str = typer.Argument(..., help="Config key (e.g. telemetry.enabled)"),
81
93
  value: str = typer.Argument(..., help="Value to set"),
82
94
  global_scope: bool = typer.Option(False, "--global", "-g", help="Update global configuration"),
95
+ json_output: AgentOutput = False,
83
96
  ):
84
97
  """Set a configuration value in specific scope (project by default)."""
85
98
  scope = ConfigScope.GLOBAL if global_scope else ConfigScope.PROJECT
@@ -98,7 +111,7 @@ def set_val(
98
111
  target = target[part]
99
112
  if not isinstance(target, dict):
100
113
  parent_key = ".".join(parts[:i+1])
101
- console.print(f"[red]Cannot set '{key}': '{parent_key}' is not a dictionary ({type(target)}).[/red]")
114
+ OutputManager.error(f"Cannot set '{key}': '{parent_key}' is not a dictionary ({type(target)}).")
102
115
  raise typer.Exit(code=1)
103
116
 
104
117
  parsed_val = _parse_value(value)
@@ -113,18 +126,26 @@ def set_val(
113
126
  # But simpler is to check if MonocoConfig accepts this structure.
114
127
  MonocoConfig(**raw_data)
115
128
  except ValidationError as e:
116
- console.print(f"[red]Validation failed for key '{key}':[/red]")
117
- console.print(e)
129
+ OutputManager.error(f"Validation failed for key '{key}':\n{e}")
118
130
  raise typer.Exit(code=1)
119
131
  except Exception as e:
120
- console.print(f"[red]Unexpected validation error: {e}[/red]")
132
+ OutputManager.error(f"Unexpected validation error: {e}")
121
133
  raise typer.Exit(code=1)
122
134
 
123
135
  # 4. Save
124
136
  save_raw_config(scope, raw_data)
125
137
 
126
138
  scope_display = "Global" if global_scope else "Project"
127
- console.print(f"[green]✓ Set {key} = {parsed_val} in {scope_display} config.[/green]")
139
+
140
+ if OutputManager.is_agent_mode():
141
+ OutputManager.print({
142
+ "status": "updated",
143
+ "scope": scope_display.lower(),
144
+ "key": key,
145
+ "value": parsed_val
146
+ })
147
+ else:
148
+ console.print(f"[green]✓ Set {key} = {parsed_val} in {scope_display} config.[/green]")
128
149
 
129
150
  if __name__ == "__main__":
130
151
  app()
@@ -4,7 +4,9 @@ from rich.console import Console
4
4
  from rich.table import Table
5
5
  from rich.panel import Panel
6
6
 
7
- from monoco.core.config import get_config
7
+ from typing import Optional, Annotated
8
+ from monoco.core.config import get_config, find_monoco_root
9
+ from monoco.core.output import AgentOutput, OutputManager
8
10
  from . import core
9
11
 
10
12
  app = typer.Typer(help="Management tools for Documentation Internationalization (i18n).")
@@ -14,6 +16,9 @@ console = Console()
14
16
  def scan(
15
17
  root: str = typer.Option(None, "--root", help="Target root directory to scan. Defaults to the project root."),
16
18
  limit: int = typer.Option(10, "--limit", help="Maximum number of missing files to display. Use 0 for unlimited."),
19
+ check_issues: bool = typer.Option(False, "--check-issues", help="Include Issues directory in the scan."),
20
+ check_source_lang: bool = typer.Option(False, "--check-source-lang", help="Verify if source files content matches source language (heuristic)."),
21
+ json: AgentOutput = False,
17
22
  ):
18
23
  """
19
24
  Scan the project for internationalization (i18n) status.
@@ -25,36 +30,92 @@ def scan(
25
30
 
26
31
  Returns a report of files missing translations in the checking target languages.
27
32
  """
28
- config = get_config()
29
- target_root = Path(root).resolve() if root else Path(config.paths.root)
33
+ if root:
34
+ target_root = Path(root).resolve()
35
+ else:
36
+ target_root = find_monoco_root(Path.cwd())
37
+
38
+ # Load config with correct root
39
+ config = get_config(project_root=str(target_root))
30
40
  target_langs = config.i18n.target_langs
41
+ source_lang = config.i18n.source_lang
31
42
 
32
- console.print(f"Scanning i18n coverage in [bold cyan]{target_root}[/bold cyan]...")
33
- console.print(f"Target Languages: [bold yellow]{', '.join(target_langs)}[/bold yellow] (Source: {config.i18n.source_lang})")
43
+ if not OutputManager.is_agent_mode():
44
+ console.print(f"Scanning i18n coverage in [bold cyan]{target_root}[/bold cyan]...")
45
+ console.print(f"Target Languages: [bold yellow]{', '.join(target_langs)}[/bold yellow] (Source: {source_lang})")
34
46
 
35
- all_files = core.discover_markdown_files(target_root)
47
+ all_files = core.discover_markdown_files(target_root, include_issues=check_issues)
36
48
 
37
49
  source_files = [f for f in all_files if not core.is_translation_file(f, target_langs)]
38
50
 
39
51
  # Store missing results: { file_path: [missing_langs] }
40
52
  missing_map = {}
53
+ # Store lang mismatch results: [file_path]
54
+ lang_mismatch_files = []
55
+
41
56
  total_checks = len(source_files) * len(target_langs)
42
57
  found_count = 0
43
58
 
44
59
  for f in source_files:
45
- missing_langs = core.check_translation_exists(f, target_root, target_langs)
60
+ # Check translation existence
61
+ missing_langs = core.check_translation_exists(f, target_root, target_langs, source_lang)
46
62
  if missing_langs:
47
63
  missing_map[f] = missing_langs
48
64
  found_count += (len(target_langs) - len(missing_langs))
49
65
  else:
50
66
  found_count += len(target_langs)
51
67
 
68
+ # Check source content language if enabled
69
+ if check_source_lang:
70
+ if not core.is_content_source_language(f, source_lang):
71
+ # Try to detect actual language for better error message
72
+ try:
73
+ content = f.read_text(encoding="utf-8")
74
+ detected = core.detect_language(content)
75
+ except:
76
+ detected = "unknown"
77
+ lang_mismatch_files.append((f, detected))
78
+
52
79
  # Reporting
53
80
  coverage = (found_count / total_checks * 100) if total_checks > 0 else 100
54
81
 
55
82
  # Sort missing_map by file path for stable output
56
83
  sorted_missing = sorted(missing_map.items(), key=lambda x: str(x[0]))
57
-
84
+
85
+ if OutputManager.is_agent_mode():
86
+ # JSON Output
87
+ report = {
88
+ "root": str(target_root),
89
+ "source_lang": source_lang,
90
+ "target_langs": target_langs,
91
+ "stats": {
92
+ "total_source_files": len(source_files),
93
+ "total_checks": total_checks,
94
+ "found_translations": found_count,
95
+ "coverage_percent": round(coverage, 2),
96
+ "missing_files_count": len(sorted_missing),
97
+ "mismatch_files_count": len(lang_mismatch_files)
98
+ },
99
+ "missing_files": [
100
+ {
101
+ "file": str(f.relative_to(target_root)),
102
+ "missing_langs": langs,
103
+ "expected_paths": [
104
+ str(core.get_target_translation_path(f, target_root, l, source_lang).relative_to(target_root))
105
+ for l in langs
106
+ ]
107
+ }
108
+ for f, langs in sorted_missing
109
+ ],
110
+ "language_mismatches": [
111
+ {"file": str(f.relative_to(target_root)), "detected": detected}
112
+ for f, detected in lang_mismatch_files
113
+ ]
114
+ }
115
+ OutputManager.print(report)
116
+ return
117
+
118
+ # Human Output
58
119
  # Apply limit
59
120
  total_missing_files = len(sorted_missing)
60
121
  display_limit = limit if limit > 0 else total_missing_files
@@ -77,7 +138,7 @@ def scan(
77
138
  rel_path = f.relative_to(target_root)
78
139
  expected_paths = []
79
140
  for lang in langs:
80
- target = core.get_target_translation_path(f, target_root, lang)
141
+ target = core.get_target_translation_path(f, target_root, lang, source_lang)
81
142
  expected_paths.append(str(target.relative_to(target_root)))
82
143
 
83
144
  table.add_row(
@@ -88,6 +149,21 @@ def scan(
88
149
 
89
150
  console.print(table)
90
151
 
152
+ # Show Language Mismatch Warnings
153
+ if lang_mismatch_files:
154
+ console.print("\n")
155
+ mismatch_table = Table(title=f"Source Language Mismatch (Expected: {source_lang})", box=None)
156
+ mismatch_table.add_column("File", style="yellow")
157
+ mismatch_table.add_column("Detected", style="red")
158
+
159
+ limit_mismatch = 10
160
+ for f, detected in lang_mismatch_files[:limit_mismatch]:
161
+ mismatch_table.add_row(str(f.relative_to(target_root)), detected)
162
+
163
+ console.print(mismatch_table)
164
+ if len(lang_mismatch_files) > limit_mismatch:
165
+ console.print(f"[dim]... and {len(lang_mismatch_files) - limit_mismatch} more.[/dim]")
166
+
91
167
  # Show hint if output was truncated
92
168
  if display_limit < total_missing_files:
93
169
  console.print(f"\n[dim]💡 Tip: Use [bold]--limit 0[/bold] to show all {total_missing_files} missing files.[/dim]\n")
@@ -111,11 +187,14 @@ def scan(
111
187
  if total_missing_files > 0:
112
188
  summary_lines.append(f" - Partial Missing: {partial_missing}")
113
189
  summary_lines.append(f" - Complete Missing: {complete_missing}")
190
+
191
+ if lang_mismatch_files:
192
+ summary_lines.append(f"Language Mismatches: {len(lang_mismatch_files)}")
114
193
 
115
194
  summary_lines.append(f"Coverage: [{status_color}]{coverage:.1f}%[/{status_color}]")
116
195
 
117
196
  summary = "\n".join(summary_lines)
118
197
  console.print(Panel(summary, title="I18N STATUS", expand=False))
119
198
 
120
- if missing_map:
199
+ if missing_map or lang_mismatch_files:
121
200
  raise typer.Exit(code=1)
@@ -1,9 +1,17 @@
1
1
  import os
2
2
  import fnmatch
3
3
  from pathlib import Path
4
- from typing import List, Set, Dict, Any
4
+ from typing import List, Set, Dict, Any, Optional
5
+ import re
5
6
 
6
- DEFAULT_EXCLUDES = [".git", ".reference", "dist", "build", "node_modules", "__pycache__", ".agent", ".mono", ".venv", "venv", "ENV", "Issues"]
7
+ DEFAULT_EXCLUDES = [
8
+ ".git", ".reference", "dist", "build", "node_modules", "__pycache__",
9
+ ".agent", ".mono", ".venv", "venv", "ENV",
10
+ # Agent Integration Directories
11
+ ".claude", ".gemini", ".qwen", ".openai", ".cursor", ".vscode", ".idea", ".fleet",
12
+ # System Prompts & Agent Configs
13
+ "AGENTS.md", "CLAUDE.md", "GEMINI.md", "QWEN.md", "SKILL.md"
14
+ ]
7
15
 
8
16
  def load_gitignore_patterns(root: Path) -> List[str]:
9
17
  """Load patterns from .gitignore file."""
@@ -25,13 +33,15 @@ def load_gitignore_patterns(root: Path) -> List[str]:
25
33
  pass
26
34
  return patterns
27
35
 
28
- def is_excluded(path: Path, root: Path, patterns: List[str]) -> bool:
36
+ def is_excluded(path: Path, root: Path, patterns: List[str], excludes: Optional[List[str]] = None) -> bool:
29
37
  """Check if a path should be excluded based on patterns and defaults."""
30
38
  rel_path = str(path.relative_to(root))
31
39
 
40
+ final_excludes = excludes if excludes is not None else DEFAULT_EXCLUDES
41
+
32
42
  # 1. Check default excludes (exact match for any path component, case-insensitive)
33
43
  for part in path.parts:
34
- if part.lower() in [e.lower() for e in DEFAULT_EXCLUDES]:
44
+ if part.lower() in [e.lower() for e in final_excludes]:
35
45
  return True
36
46
 
37
47
  # 2. Check gitignore patterns
@@ -55,15 +65,19 @@ def is_excluded(path: Path, root: Path, patterns: List[str]) -> bool:
55
65
 
56
66
  return False
57
67
 
58
- def discover_markdown_files(root: Path) -> List[Path]:
68
+ def discover_markdown_files(root: Path, include_issues: bool = False) -> List[Path]:
59
69
  """Recursively find markdown files while respecting exclusion rules."""
60
70
  patterns = load_gitignore_patterns(root)
61
71
  all_md_files = []
62
72
 
73
+ excludes = list(DEFAULT_EXCLUDES)
74
+ if not include_issues:
75
+ excludes.append("Issues")
76
+
63
77
  # We walk to ensure we can skip directories early if needed,
64
78
  # but for now rglob + filter is simpler.
65
79
  for p in root.rglob("*.md"):
66
- if p.is_file() and not is_excluded(p, root, patterns):
80
+ if p.is_file() and not is_excluded(p, root, patterns, excludes=excludes):
67
81
  all_md_files.append(p)
68
82
 
69
83
  return sorted(all_md_files)
@@ -77,6 +91,12 @@ def is_translation_file(path: Path, target_langs: List[str]) -> bool:
77
91
  for lang in normalized_langs:
78
92
  if stem_upper.endswith(f"_{lang.upper()}"):
79
93
  return True
94
+
95
+ # Generic Suffix Check: Detect any _XX suffix where XX is 2-3 letters
96
+ # This prevents files like README_ZH.md from being treated as source files
97
+ # even if 'zh' is not in target_langs (e.g. when scanning for 'en' gaps).
98
+ if re.search(r'_[A-Z]{2,3}$', stem_upper):
99
+ return True
80
100
 
81
101
  # Subdir check (case-insensitive)
82
102
  path_parts_lower = [p.lower() for p in path.parts]
@@ -86,29 +106,32 @@ def is_translation_file(path: Path, target_langs: List[str]) -> bool:
86
106
 
87
107
  return False
88
108
 
89
- def get_target_translation_path(path: Path, root: Path, lang: str) -> Path:
109
+ def get_target_translation_path(path: Path, root: Path, lang: str, source_lang: str = "en") -> Path:
90
110
  """Calculate the expected translation path for a specific language."""
91
111
  lang = lang.lower()
92
112
 
93
113
  # Parallel Directory Mode: docs/en/... -> docs/zh/...
94
- # We assume 'en' is the source language for now.
95
114
  path_parts = list(path.parts)
96
- # Search for 'en' component to replace
97
- # We iterate from root relative parts to be safe, but simple replacement of the first 'en'
98
- # component (if not part of filename) is a good heuristic for docs structure.
115
+ # Search for source_lang component to replace
99
116
  for i, part in enumerate(path_parts):
100
- if part.lower() == 'en':
117
+ if part.lower() == source_lang.lower():
101
118
  path_parts[i] = lang
102
119
  return Path(*path_parts)
103
120
 
104
- # Suffix Mode: for root files
121
+ # Suffix Mode:
122
+ # If stem ends with _{SOURCE_LANG}, strip it.
123
+ stem = path.stem
124
+ source_suffix = f"_{source_lang.upper()}"
125
+ if stem.upper().endswith(source_suffix):
126
+ stem = stem[:-len(source_suffix)]
127
+
105
128
  if path.parent == root:
106
- return path.with_name(f"{path.stem}_{lang.upper()}{path.suffix}")
129
+ return path.with_name(f"{stem}_{lang.upper()}{path.suffix}")
107
130
 
108
131
  # Subdir Mode: for documentation directories (fallback)
109
132
  return path.parent / lang / path.name
110
133
 
111
- def check_translation_exists(path: Path, root: Path, target_langs: List[str]) -> List[str]:
134
+ def check_translation_exists(path: Path, root: Path, target_langs: List[str], source_lang: str = "en") -> List[str]:
112
135
  """
113
136
  Verify which target languages have translations.
114
137
  Returns a list of missing language codes.
@@ -116,12 +139,85 @@ def check_translation_exists(path: Path, root: Path, target_langs: List[str]) ->
116
139
  if is_translation_file(path, target_langs):
117
140
  return [] # Already a translation, skip
118
141
 
142
+ # Special handling for standard files: always treat as EN source
143
+ effective_source_lang = source_lang
144
+ if path.name.upper() in ["README.MD", "CHANGELOG.MD", "CODE_OF_CONDUCT.MD", "CONTRIBUTING.MD", "LICENSE.MD", "SECURITY.MD"]:
145
+ effective_source_lang = "en"
146
+
119
147
  missing = []
120
148
  for lang in target_langs:
121
- target = get_target_translation_path(path, root, lang)
149
+ # Skip if target language matches the effective source language
150
+ if lang.lower() == effective_source_lang.lower():
151
+ continue
152
+
153
+ target = get_target_translation_path(path, root, lang, effective_source_lang)
122
154
  if not target.exists():
123
155
  missing.append(lang)
124
156
  return missing
157
+
158
+ def detect_language(content: str) -> str:
159
+ """
160
+ Detect the language of the content using simple heuristics.
161
+ Returns: 'zh', 'en', or 'unknown'
162
+ """
163
+ if not content:
164
+ return 'unknown'
165
+
166
+ # Strip YAML Frontmatter if present
167
+ # Matches --- at start, followed by anything, followed by ---
168
+ frontmatter_pattern = re.compile(r'^---\n.*?\n---\n', re.DOTALL)
169
+ content = frontmatter_pattern.sub('', content)
170
+
171
+ if not content.strip():
172
+ return 'unknown'
173
+
174
+ # 1. Check for CJK characters (Chinese/Japanese/Korean)
175
+ # Range: \u4e00-\u9fff (Common CJK Unified Ideographs)
176
+ # Heuristic: If CJK count > threshold, it's likely Asian (we assume ZH for now in this context)
177
+ total_chars = len(content)
178
+ cjk_count = sum(1 for c in content if '\u4e00' <= c <= '\u9fff')
179
+
180
+ # If > 5% chars are CJK, highly likely to be Chinese document
181
+ if total_chars > 0 and cjk_count / total_chars > 0.05:
182
+ return 'zh'
183
+
184
+ # 2. Check for English
185
+ # Heuristic: High ASCII ratio and low CJK
186
+ non_ascii = sum(1 for c in content if ord(c) > 127)
187
+
188
+ # If < 10% non-ASCII, likely English (or code)
189
+ if total_chars > 0 and non_ascii / total_chars < 0.1:
190
+ return 'en'
191
+
192
+ return 'unknown'
193
+
194
+ def is_content_source_language(path: Path, source_lang: str = "en") -> bool:
195
+ """
196
+ Check if file content appears to be in the source language.
197
+ """
198
+ try:
199
+ # Special handling for README/CHANGELOG
200
+ if path.name.upper() in ["README.MD", "CHANGELOG.MD"]:
201
+ source_lang = "en"
202
+
203
+ content = path.read_text(encoding="utf-8")
204
+ detected = detect_language(content)
205
+
206
+ # 'unknown' is leniently accepted as valid to avoid false positives on code-heavy files
207
+ if detected == 'unknown':
208
+ return True
209
+
210
+ # Normalize source_lang
211
+ expected = source_lang.lower()
212
+ if expected == 'zh' or expected == 'cn':
213
+ return detected == 'zh'
214
+ elif expected == 'en':
215
+ return detected == 'en'
216
+
217
+ # For other languages, we don't have detectors yet
218
+ return True
219
+ except Exception:
220
+ return True # Assume valid on error
125
221
  # ... (Existing code) ...
126
222
 
127
223
  SKILL_CONTENT = """---