oh-my-codex-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/skills/agent-kb/HOW_TO_USE.md +428 -0
- package/.agent/skills/agent-kb/README.md +46 -0
- package/.agent/skills/agent-kb/SKILL.md +128 -0
- package/.agent/skills/agent-kb/references/intelligent-analysis-explained.md +333 -0
- package/.agent/skills/agent-kb/references/query-optimization.md +225 -0
- package/.agent/skills/aireview/SKILL.md +704 -0
- package/.agent/skills/analyze/SKILL.md +81 -0
- package/.agent/skills/architect-planner/HOW_TO_USE.md +238 -0
- package/.agent/skills/architect-planner/README.md +41 -0
- package/.agent/skills/architect-planner/SKILL.md +539 -0
- package/.agent/skills/auto-mbti/SKILL.md +291 -0
- package/.agent/skills/autopilot/SKILL.md +222 -0
- package/.agent/skills/backend-patterns/SKILL.md +602 -0
- package/.agent/skills/bdd-generator/README.md +78 -0
- package/.agent/skills/bdd-generator/SKILL.md +436 -0
- package/.agent/skills/brainstorming/HOW_TO_USE.md +289 -0
- package/.agent/skills/brainstorming/README.md +41 -0
- package/.agent/skills/brainstorming/SKILL.md +165 -0
- package/.agent/skills/build-fix/SKILL.md +190 -0
- package/.agent/skills/cancel/SKILL.md +658 -0
- package/.agent/skills/checkpoint/SKILL.md +94 -0
- package/.agent/skills/code-review/SKILL.md +273 -0
- package/.agent/skills/coding-standards/SKILL.md +535 -0
- package/.agent/skills/conductor/SKILL.md +128 -0
- package/.agent/skills/conductor/commands/conductor/implement.toml +358 -0
- package/.agent/skills/conductor/commands/conductor/newTrack.toml +142 -0
- package/.agent/skills/conductor/commands/conductor/revert.toml +123 -0
- package/.agent/skills/conductor/commands/conductor/setup.toml +429 -0
- package/.agent/skills/conductor/commands/conductor/status.toml +57 -0
- package/.agent/skills/conductor/scripts/install.sh +89 -0
- package/.agent/skills/conductor/templates/code_styleguides/csharp.md +115 -0
- package/.agent/skills/conductor/templates/code_styleguides/dart.md +238 -0
- package/.agent/skills/conductor/templates/code_styleguides/general.md +23 -0
- package/.agent/skills/conductor/templates/code_styleguides/go.md +48 -0
- package/.agent/skills/conductor/templates/code_styleguides/html-css.md +49 -0
- package/.agent/skills/conductor/templates/code_styleguides/javascript.md +51 -0
- package/.agent/skills/conductor/templates/code_styleguides/python.md +37 -0
- package/.agent/skills/conductor/templates/code_styleguides/typescript.md +43 -0
- package/.agent/skills/conductor/templates/rules/README.md +23 -0
- package/.agent/skills/conductor/templates/rules/agents.md +49 -0
- package/.agent/skills/conductor/templates/rules/coding-style.md +70 -0
- package/.agent/skills/conductor/templates/rules/dev.md +20 -0
- package/.agent/skills/conductor/templates/rules/git-workflow.md +45 -0
- package/.agent/skills/conductor/templates/rules/hooks.md +6 -0
- package/.agent/skills/conductor/templates/rules/patterns.md +55 -0
- package/.agent/skills/conductor/templates/rules/performance.md +47 -0
- package/.agent/skills/conductor/templates/rules/research.md +26 -0
- package/.agent/skills/conductor/templates/rules/review.md +22 -0
- package/.agent/skills/conductor/templates/rules/security.md +36 -0
- package/.agent/skills/conductor/templates/rules/testing.md +30 -0
- package/.agent/skills/conductor/templates/workflow.md +333 -0
- package/.agent/skills/consensus/HOW_TO_USE.md +191 -0
- package/.agent/skills/consensus/README.md +41 -0
- package/.agent/skills/consensus/SKILL.md +317 -0
- package/.agent/skills/content-research-writer/SKILL.md +537 -0
- package/.agent/skills/debug-analysis/SKILL.md +331 -0
- package/.agent/skills/deepinit/SKILL.md +347 -0
- package/.agent/skills/deepsearch/SKILL.md +56 -0
- package/.agent/skills/doctor/SKILL.md +158 -0
- package/.agent/skills/drawio/EXAMPLES.md +382 -0
- package/.agent/skills/drawio/QUICK_START.md +237 -0
- package/.agent/skills/drawio/README.md +315 -0
- package/.agent/skills/drawio/SETUP_GUIDE.md +254 -0
- package/.agent/skills/drawio/SKILL.md +1176 -0
- package/.agent/skills/e2e/SKILL.md +396 -0
- package/.agent/skills/ecomode/SKILL.md +160 -0
- package/.agent/skills/electron-driver/SKILL.md +144 -0
- package/.agent/skills/electron-driver/scripts/driver-template.js +71 -0
- package/.agent/skills/eval/SKILL.md +140 -0
- package/.agent/skills/eval-harness/SKILL.md +242 -0
- package/.agent/skills/evolve/SKILL.md +213 -0
- package/.agent/skills/frontend-design/SKILL.md +42 -0
- package/.agent/skills/frontend-patterns/SKILL.md +646 -0
- package/.agent/skills/frontend-ui-ux/SKILL.md +70 -0
- package/.agent/skills/git-master/SKILL.md +75 -0
- package/.agent/skills/help/SKILL.md +89 -0
- package/.agent/skills/iterative-retrieval/SKILL.md +217 -0
- package/.agent/skills/local-skills-setup/SKILL.md +483 -0
- package/.agent/skills/log-analyzer/SKILL.md +187 -0
- package/.agent/skills/mcp-setup/SKILL.md +226 -0
- package/.agent/skills/multi-model-research/HOW_TO_USE.md +614 -0
- package/.agent/skills/multi-model-research/README.md +233 -0
- package/.agent/skills/multi-model-research/SKILL.md +541 -0
- package/.agent/skills/multi-model-research/references/troubleshooting.md +415 -0
- package/.agent/skills/note/SKILL.md +80 -0
- package/.agent/skills/omc-setup/SKILL.md +219 -0
- package/.agent/skills/orchestrate/SKILL.md +620 -0
- package/.agent/skills/patent-workflow/IMPLEMENTATION_SUMMARY.md +500 -0
- package/.agent/skills/patent-workflow/README.md +455 -0
- package/.agent/skills/patent-workflow/SKILL.md +1036 -0
- package/.agent/skills/patent-workflow/tools/irr_checker.py +260 -0
- package/.agent/skills/patent-workflow/tools/sample_terminology.json +49 -0
- package/.agent/skills/patent-workflow/tools/term_checker.py +355 -0
- package/.agent/skills/pattern-recognition/SKILL.md +792 -0
- package/.agent/skills/pipeline/SKILL.md +448 -0
- package/.agent/skills/plan/SKILL.md +309 -0
- package/.agent/skills/planning-methodology/SKILL.md +370 -0
- package/.agent/skills/planning-with-files/SKILL.md +210 -0
- package/.agent/skills/planning-with-files/examples.md +202 -0
- package/.agent/skills/planning-with-files/reference.md +218 -0
- package/.agent/skills/planning-with-files/scripts/check-complete.ps1 +42 -0
- package/.agent/skills/planning-with-files/scripts/check-complete.sh +44 -0
- package/.agent/skills/planning-with-files/scripts/init-session.ps1 +120 -0
- package/.agent/skills/planning-with-files/scripts/init-session.sh +120 -0
- package/.agent/skills/planning-with-files/scripts/session-catchup.py +208 -0
- package/.agent/skills/planning-with-files/templates/findings.md +95 -0
- package/.agent/skills/planning-with-files/templates/progress.md +114 -0
- package/.agent/skills/planning-with-files/templates/task_plan.md +132 -0
- package/.agent/skills/project-analyze/CLAUDE.md +18 -0
- package/.agent/skills/project-analyze/HOW_TO_USE.md +145 -0
- package/.agent/skills/project-analyze/README.md +42 -0
- package/.agent/skills/project-analyze/SKILL.md +289 -0
- package/.agent/skills/project-analyze/SKILL.md.backup +287 -0
- package/.agent/skills/project-analyze/SKILL.md.backup_20260105_093646 +287 -0
- package/.agent/skills/project-analyze/assets/analysis-report-template.md +433 -0
- package/.agent/skills/project-analyze/references/analysis-patterns.md +422 -0
- package/.agent/skills/project-analyze/references/projectmind-explained.md +535 -0
- package/.agent/skills/project-session-manager/SKILL.md +428 -0
- package/.agent/skills/project-session-manager/lib/config.sh +86 -0
- package/.agent/skills/project-session-manager/lib/parse.sh +121 -0
- package/.agent/skills/project-session-manager/lib/session.sh +132 -0
- package/.agent/skills/project-session-manager/lib/tmux.sh +103 -0
- package/.agent/skills/project-session-manager/lib/worktree.sh +171 -0
- package/.agent/skills/project-session-manager/psm.sh +629 -0
- package/.agent/skills/project-session-manager/templates/feature.md +56 -0
- package/.agent/skills/project-session-manager/templates/issue-fix.md +57 -0
- package/.agent/skills/project-session-manager/templates/pr-review.md +65 -0
- package/.agent/skills/project-session-manager/templates/projects.json +19 -0
- package/.agent/skills/quality-check/HOW_TO_USE.md +171 -0
- package/.agent/skills/quality-check/README.md +50 -0
- package/.agent/skills/quality-check/SKILL.md +240 -0
- package/.agent/skills/quality-check/SKILL.md.backup +238 -0
- package/.agent/skills/quality-check/SKILL.md.backup_20260105_093646 +238 -0
- package/.agent/skills/quality-check/assets/quality-report-template.md +437 -0
- package/.agent/skills/quality-check/references/refactoring-patterns.md +550 -0
- package/.agent/skills/quality-check/references/scoring-criteria.md +454 -0
- package/.agent/skills/quality-validation/SKILL.md +519 -0
- package/.agent/skills/quality-validation/SKILL.md.backup +573 -0
- package/.agent/skills/quality-validation/SKILL.md.backup_20260105_093646 +573 -0
- package/.agent/skills/ralph/SKILL.md +236 -0
- package/.agent/skills/ralph-init/SKILL.md +78 -0
- package/.agent/skills/ralplan/SKILL.md +58 -0
- package/.agent/skills/refactor-clean/SKILL.md +49 -0
- package/.agent/skills/release/SKILL.md +84 -0
- package/.agent/skills/research/SKILL.md +526 -0
- package/.agent/skills/research-methodology/SKILL.md +268 -0
- package/.agent/skills/review/SKILL.md +53 -0
- package/.agent/skills/security-review/SKILL.md +509 -0
- package/.agent/skills/security-review/cloud-infrastructure-security.md +361 -0
- package/.agent/skills/setup-pm/SKILL.md +102 -0
- package/.agent/skills/skill/SKILL.md +424 -0
- package/.agent/skills/skill-create/SKILL.md +209 -0
- package/.agent/skills/skill-debugger/HOW_TO_USE.md +244 -0
- package/.agent/skills/skill-debugger/README.md +44 -0
- package/.agent/skills/skill-debugger/SKILL.md +326 -0
- package/.agent/skills/skill-debugger/diagnostic_checklist.md +115 -0
- package/.agent/skills/skill-development/SKILL.md +661 -0
- package/.agent/skills/skill-development/references/skill-creator-original.md +209 -0
- package/.agent/skills/skill-doc-generator/README.md +37 -0
- package/.agent/skills/skill-doc-generator/SKILL.md +331 -0
- package/.agent/skills/skill-quality-analyzer/HOW_TO_USE.md +243 -0
- package/.agent/skills/skill-quality-analyzer/README.md +61 -0
- package/.agent/skills/skill-quality-analyzer/SKILL.md +247 -0
- package/.agent/skills/skill-quality-analyzer/analyzer.py +209 -0
- package/.agent/skills/skill-quality-analyzer/expected_output.json +81 -0
- package/.agent/skills/skill-quality-analyzer/sample_input.json +9 -0
- package/.agent/skills/skill-tester/README.md +46 -0
- package/.agent/skills/skill-tester/SKILL.md +345 -0
- package/.agent/skills/start-dev/SKILL.md +701 -0
- package/.agent/skills/swarm/SKILL.md +691 -0
- package/.agent/skills/task-kb-lookup/SKILL.md +211 -0
- package/.agent/skills/task-kb-record/SKILL.md +417 -0
- package/.agent/skills/tdd/SKILL.md +446 -0
- package/.agent/skills/tdd-generator/DEMO.md +516 -0
- package/.agent/skills/tdd-generator/README.md +89 -0
- package/.agent/skills/tdd-generator/SKILL.md +278 -0
- package/.agent/skills/tdd-workflow/SKILL.md +424 -0
- package/.agent/skills/test-coverage/SKILL.md +48 -0
- package/.agent/skills/thinkdeep/HOW_TO_USE.md +183 -0
- package/.agent/skills/thinkdeep/README.md +41 -0
- package/.agent/skills/thinkdeep/SKILL.md +343 -0
- package/.agent/skills/ui-ux-pro-max/SKILL.md +228 -0
- package/.agent/skills/ui-ux-pro-max/data/charts.csv +26 -0
- package/.agent/skills/ui-ux-pro-max/data/colors.csv +97 -0
- package/.agent/skills/ui-ux-pro-max/data/landing.csv +31 -0
- package/.agent/skills/ui-ux-pro-max/data/products.csv +97 -0
- package/.agent/skills/ui-ux-pro-max/data/prompts.csv +24 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
- package/.agent/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
- package/.agent/skills/ui-ux-pro-max/data/styles.csv +59 -0
- package/.agent/skills/ui-ux-pro-max/data/typography.csv +58 -0
- package/.agent/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
- package/.agent/skills/ui-ux-pro-max/scripts/core.py +236 -0
- package/.agent/skills/ui-ux-pro-max/scripts/search.py +61 -0
- package/.agent/skills/ultrapilot/SKILL.md +647 -0
- package/.agent/skills/ultraqa/SKILL.md +152 -0
- package/.agent/skills/ultrawork/SKILL.md +123 -0
- package/.agent/skills/update-codemaps/SKILL.md +38 -0
- package/.agent/skills/update-docs/SKILL.md +52 -0
- package/.agent/skills/verification-loop/SKILL.md +140 -0
- package/.agent/skills/verify/SKILL.md +80 -0
- package/.agent/skills/writer-memory/SKILL.md +459 -0
- package/.agent/skills/writer-memory/lib/character-tracker.ts +338 -0
- package/.agent/skills/writer-memory/lib/memory-manager.ts +804 -0
- package/.agent/skills/writer-memory/lib/relationship-graph.ts +400 -0
- package/.agent/skills/writer-memory/lib/scene-organizer.ts +544 -0
- package/.agent/skills/writer-memory/lib/synopsis-builder.ts +339 -0
- package/.agent/skills/writer-memory/templates/synopsis-template.md +46 -0
- package/.governance/skill-lint.allowlist +4 -0
- package/.governance/skill-llm.allowlist +4 -0
- package/AGENTS.md +59 -0
- package/LICENSE +21 -0
- package/README.md +169 -0
- package/README.zh.md +145 -0
- package/bin/omcodex.js +8 -0
- package/commands/conductor/implement.toml +358 -0
- package/commands/conductor/newTrack.toml +142 -0
- package/commands/conductor/revert.toml +123 -0
- package/commands/conductor/setup.toml +429 -0
- package/commands/conductor/status.toml +57 -0
- package/docs/ALIGNMENT.md +40 -0
- package/docs/CODEX.md +133 -0
- package/docs/NOTIFY.md +81 -0
- package/docs/SKILL_GOVERNANCE.md +72 -0
- package/docs/SKILL_GOVERNANCE_FRAMEWORK.md +182 -0
- package/docs/SKILL_GOVERNANCE_FRAMEWORK.zh.md +170 -0
- package/package.json +50 -0
- package/prompts/architect.md +105 -0
- package/prompts/executor.md +134 -0
- package/prompts/planner.md +113 -0
- package/scripts/check-skill-governance.sh +84 -0
- package/scripts/check-skill-llm-governance.js +302 -0
- package/scripts/eval-skills.js +217 -0
- package/scripts/generate-catalog-docs.js +95 -0
- package/scripts/generate-codex-mcp-config.sh +22 -0
- package/scripts/install-codex-force.sh +5 -0
- package/scripts/install-codex-incremental.sh +5 -0
- package/scripts/install-codex.sh +79 -0
- package/scripts/notify-dispatch.js +15 -0
- package/scripts/setup-package-manager.js +137 -0
- package/src/catalog/generated/public-catalog.json +547 -0
- package/src/catalog/manifest.json +542 -0
- package/src/catalog/reader.js +43 -0
- package/src/catalog/schema.js +79 -0
- package/src/cli/doctor.js +62 -0
- package/src/cli/index.js +85 -0
- package/src/cli/notify.js +127 -0
- package/src/cli/route.js +43 -0
- package/src/cli/setup.js +155 -0
- package/src/cli/team.js +125 -0
- package/src/config/generator.js +119 -0
- package/src/mcp/memory-server.js +241 -0
- package/src/mcp/state-server.js +112 -0
- package/src/mcp/trace-server.js +168 -0
- package/src/notify/dispatch.js +74 -0
- package/src/notify/extensibility/dispatcher.js +113 -0
- package/src/notify/extensibility/events.js +15 -0
- package/src/notify/extensibility/loader.js +54 -0
- package/src/router/skill-router.js +90 -0
- package/src/team/auto-advance.js +72 -0
- package/src/team/orchestrator.js +82 -0
- package/src/team/state-store.js +33 -0
- package/src/utils/paths.js +33 -0
- package/templates/AGENTS.md +15 -0
- package/templates/catalog-manifest.json +542 -0
- package/templates/code_styleguides/csharp.md +115 -0
- package/templates/code_styleguides/dart.md +238 -0
- package/templates/code_styleguides/general.md +23 -0
- package/templates/code_styleguides/go.md +48 -0
- package/templates/code_styleguides/html-css.md +49 -0
- package/templates/code_styleguides/javascript.md +51 -0
- package/templates/code_styleguides/python.md +37 -0
- package/templates/code_styleguides/typescript.md +43 -0
- package/templates/rules/README.md +23 -0
- package/templates/rules/agents.md +49 -0
- package/templates/rules/coding-style.md +70 -0
- package/templates/rules/dev.md +20 -0
- package/templates/rules/git-workflow.md +45 -0
- package/templates/rules/notify.md +6 -0
- package/templates/rules/patterns.md +55 -0
- package/templates/rules/performance.md +47 -0
- package/templates/rules/research.md +26 -0
- package/templates/rules/review.md +22 -0
- package/templates/rules/security.md +36 -0
- package/templates/rules/testing.md +30 -0
- package/templates/workflow.md +333 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
IRR (Inverse Repetition Rate) Checker for Patent Documents
|
|
4
|
+
|
|
5
|
+
借鉴AutoPatent的IRR指标,用于检测专利文档中的句子重复率。
|
|
6
|
+
|
|
7
|
+
IRR = 1 - (重复句子数 / 总句子数)
|
|
8
|
+
目标: IRR ≥ 0.85 (重复率 ≤ 15%)
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python irr_checker.py <patent_document_path>
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import List, Tuple, Dict
|
|
18
|
+
from collections import Counter
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class IRRChecker:
|
|
22
|
+
"""IRR重复率检查器"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, threshold: float = 0.85):
|
|
25
|
+
"""
|
|
26
|
+
初始化IRR检查器
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
threshold: IRR阈值,默认0.85(重复率≤15%)
|
|
30
|
+
"""
|
|
31
|
+
self.threshold = threshold
|
|
32
|
+
|
|
33
|
+
def extract_sentences(self, text: str) -> List[str]:
|
|
34
|
+
"""
|
|
35
|
+
从文本中提取句子
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
text: 输入文本
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
句子列表
|
|
42
|
+
"""
|
|
43
|
+
# 中文句子分隔符
|
|
44
|
+
sentences = re.split(r'[。!?;\n]', text)
|
|
45
|
+
|
|
46
|
+
# 清理空句子和过短句子
|
|
47
|
+
sentences = [s.strip() for s in sentences if s.strip() and len(s.strip()) > 5]
|
|
48
|
+
|
|
49
|
+
return sentences
|
|
50
|
+
|
|
51
|
+
def normalize_sentence(self, sentence: str) -> str:
|
|
52
|
+
"""
|
|
53
|
+
标准化句子(去除空格、标点等)
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
sentence: 原始句子
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
标准化后的句子
|
|
60
|
+
"""
|
|
61
|
+
# 去除空格
|
|
62
|
+
normalized = re.sub(r'\s+', '', sentence)
|
|
63
|
+
|
|
64
|
+
# 去除标点符号(保留中文字符、数字、英文字母)
|
|
65
|
+
normalized = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', normalized)
|
|
66
|
+
|
|
67
|
+
return normalized
|
|
68
|
+
|
|
69
|
+
def calculate_irr(self, text: str) -> Tuple[float, Dict]:
|
|
70
|
+
"""
|
|
71
|
+
计算IRR指标
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
text: 专利文档文本
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
(IRR分数, 详细统计信息)
|
|
78
|
+
"""
|
|
79
|
+
sentences = self.extract_sentences(text)
|
|
80
|
+
total_sentences = len(sentences)
|
|
81
|
+
|
|
82
|
+
if total_sentences == 0:
|
|
83
|
+
return 1.0, {
|
|
84
|
+
'total_sentences': 0,
|
|
85
|
+
'unique_sentences': 0,
|
|
86
|
+
'repetition_rate': 0.0,
|
|
87
|
+
'repeated_sentences': []
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# 标准化句子
|
|
91
|
+
normalized_sentences = [self.normalize_sentence(s) for s in sentences]
|
|
92
|
+
|
|
93
|
+
# 统计句子频率
|
|
94
|
+
sentence_counts = Counter(normalized_sentences)
|
|
95
|
+
|
|
96
|
+
# 找出重复的句子
|
|
97
|
+
repeated = {sent: count for sent, count in sentence_counts.items() if count > 1}
|
|
98
|
+
|
|
99
|
+
# 计算唯一句子数
|
|
100
|
+
unique_sentences = len(sentence_counts)
|
|
101
|
+
|
|
102
|
+
# 计算IRR
|
|
103
|
+
irr = unique_sentences / total_sentences
|
|
104
|
+
|
|
105
|
+
# 计算重复率
|
|
106
|
+
repetition_rate = 1 - irr
|
|
107
|
+
|
|
108
|
+
# 找出重复句子的原文
|
|
109
|
+
repeated_sentences = []
|
|
110
|
+
for norm_sent, count in repeated.items():
|
|
111
|
+
# 找到第一个匹配的原句
|
|
112
|
+
for original in sentences:
|
|
113
|
+
if self.normalize_sentence(original) == norm_sent:
|
|
114
|
+
repeated_sentences.append({
|
|
115
|
+
'sentence': original[:100] + '...' if len(original) > 100 else original,
|
|
116
|
+
'count': count
|
|
117
|
+
})
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
return irr, {
|
|
121
|
+
'total_sentences': total_sentences,
|
|
122
|
+
'unique_sentences': unique_sentences,
|
|
123
|
+
'repetition_rate': repetition_rate,
|
|
124
|
+
'repeated_sentences': repeated_sentences
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
def check_document(self, file_path: str) -> Dict:
|
|
128
|
+
"""
|
|
129
|
+
检查专利文档的IRR
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
file_path: 文档路径
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
检查结果字典
|
|
136
|
+
"""
|
|
137
|
+
try:
|
|
138
|
+
# 读取文件
|
|
139
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
140
|
+
text = f.read()
|
|
141
|
+
|
|
142
|
+
# 计算IRR
|
|
143
|
+
irr, stats = self.calculate_irr(text)
|
|
144
|
+
|
|
145
|
+
# 判断是否通过
|
|
146
|
+
passed = irr >= self.threshold
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
'passed': passed,
|
|
150
|
+
'irr': irr,
|
|
151
|
+
'threshold': self.threshold,
|
|
152
|
+
'stats': stats
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
except FileNotFoundError:
|
|
156
|
+
return {
|
|
157
|
+
'error': f'文件不存在: {file_path}'
|
|
158
|
+
}
|
|
159
|
+
except Exception as e:
|
|
160
|
+
return {
|
|
161
|
+
'error': f'处理文件时出错: {str(e)}'
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
def format_report(self, result: Dict) -> str:
|
|
165
|
+
"""
|
|
166
|
+
格式化检查报告
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
result: 检查结果
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
格式化的报告字符串
|
|
173
|
+
"""
|
|
174
|
+
if 'error' in result:
|
|
175
|
+
return f"❌ Error: {result['error']}"
|
|
176
|
+
|
|
177
|
+
irr = result['irr']
|
|
178
|
+
threshold = result['threshold']
|
|
179
|
+
passed = result['passed']
|
|
180
|
+
stats = result['stats']
|
|
181
|
+
|
|
182
|
+
# 状态emoji
|
|
183
|
+
status = "✅ Pass" if passed else "❌ Fail"
|
|
184
|
+
|
|
185
|
+
# 构建报告
|
|
186
|
+
report = []
|
|
187
|
+
report.append("=" * 60)
|
|
188
|
+
report.append("IRR (Inverse Repetition Rate) Check Report")
|
|
189
|
+
report.append("=" * 60)
|
|
190
|
+
report.append("")
|
|
191
|
+
report.append(f"IRR Score: {irr:.4f} ({status} - 目标≥{threshold:.2f})")
|
|
192
|
+
report.append(f"Unique Sentences: {stats['unique_sentences']} / {stats['total_sentences']}")
|
|
193
|
+
report.append(f"Repetition Rate: {stats['repetition_rate'] * 100:.2f}%")
|
|
194
|
+
report.append("")
|
|
195
|
+
|
|
196
|
+
# 如果有重复句子,列出来
|
|
197
|
+
if stats['repeated_sentences']:
|
|
198
|
+
report.append("Repetitive Sentences Found:")
|
|
199
|
+
report.append("-" * 60)
|
|
200
|
+
for i, item in enumerate(stats['repeated_sentences'][:10], 1): # 最多显示10个
|
|
201
|
+
report.append(f"{i}. (重复{item['count']}次)")
|
|
202
|
+
report.append(f" {item['sentence']}")
|
|
203
|
+
report.append("")
|
|
204
|
+
|
|
205
|
+
if len(stats['repeated_sentences']) > 10:
|
|
206
|
+
report.append(f"... and {len(stats['repeated_sentences']) - 10} more repeated sentences")
|
|
207
|
+
report.append("")
|
|
208
|
+
|
|
209
|
+
# 建议
|
|
210
|
+
report.append("Recommendations:")
|
|
211
|
+
report.append("-" * 60)
|
|
212
|
+
if passed:
|
|
213
|
+
report.append("✅ IRR指标达标,文档重复率控制良好。")
|
|
214
|
+
else:
|
|
215
|
+
report.append("⚠️ IRR指标未达标,建议:")
|
|
216
|
+
report.append(" 1. 变换表述方式(同一技术特征用不同角度描述)")
|
|
217
|
+
report.append(" 2. 增加技术细节(不同实施例补充不同参数)")
|
|
218
|
+
report.append(" 3. 避免模板化表述(减少套话)")
|
|
219
|
+
|
|
220
|
+
report.append("")
|
|
221
|
+
report.append("=" * 60)
|
|
222
|
+
|
|
223
|
+
return '\n'.join(report)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def main():
|
|
227
|
+
"""主函数"""
|
|
228
|
+
if len(sys.argv) < 2:
|
|
229
|
+
print("Usage: python irr_checker.py <patent_document_path>")
|
|
230
|
+
print("")
|
|
231
|
+
print("Example:")
|
|
232
|
+
print(" python irr_checker.py /path/to/patent.txt")
|
|
233
|
+
sys.exit(1)
|
|
234
|
+
|
|
235
|
+
file_path = sys.argv[1]
|
|
236
|
+
|
|
237
|
+
# 创建检查器
|
|
238
|
+
checker = IRRChecker(threshold=0.85)
|
|
239
|
+
|
|
240
|
+
# 检查文档
|
|
241
|
+
print(f"Checking document: {file_path}")
|
|
242
|
+
print("")
|
|
243
|
+
|
|
244
|
+
result = checker.check_document(file_path)
|
|
245
|
+
|
|
246
|
+
# 输出报告
|
|
247
|
+
report = checker.format_report(result)
|
|
248
|
+
print(report)
|
|
249
|
+
|
|
250
|
+
# 返回退出码
|
|
251
|
+
if 'error' in result:
|
|
252
|
+
sys.exit(2)
|
|
253
|
+
elif not result['passed']:
|
|
254
|
+
sys.exit(1)
|
|
255
|
+
else:
|
|
256
|
+
sys.exit(0)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
if __name__ == '__main__':
|
|
260
|
+
main()
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard_terms": {
|
|
3
|
+
"联邦学习": ["标准术语", "分布式机器学习方法"],
|
|
4
|
+
"用户画像数据": ["标准术语", "用户特征数据"],
|
|
5
|
+
"第三方鉴权节点": ["标准术语", "可信第三方"],
|
|
6
|
+
"差分隐私": ["标准术语", "隐私保护技术"],
|
|
7
|
+
"模型训练": ["标准术语"],
|
|
8
|
+
"数据加密": ["标准术语"],
|
|
9
|
+
"梯度聚合": ["标准术语", "联邦学习核心步骤"],
|
|
10
|
+
"本地训练": ["标准术语"],
|
|
11
|
+
"全局模型": ["标准术语"],
|
|
12
|
+
"隐私预算": ["标准术语", "差分隐私参数"],
|
|
13
|
+
"权利要求": ["专利术语"],
|
|
14
|
+
"技术方案": ["专利术语"],
|
|
15
|
+
"有益效果": ["专利术语"],
|
|
16
|
+
"具体实施方式": ["专利术语"],
|
|
17
|
+
"背景技术": ["专利术语"]
|
|
18
|
+
},
|
|
19
|
+
"synonym_groups": [
|
|
20
|
+
["联邦学习", "分布式学习", "联合学习", "协作学习"],
|
|
21
|
+
["用户画像数据", "用户数据", "用户特征", "用户信息"],
|
|
22
|
+
["第三方鉴权节点", "可信第三方", "中间方", "第三方服务器"],
|
|
23
|
+
["差分隐私", "隐私保护", "隐私计算"],
|
|
24
|
+
["模型训练", "训练模型", "模型学习"],
|
|
25
|
+
["数据加密", "加密数据", "加密传输"],
|
|
26
|
+
["梯度聚合", "梯度汇总", "梯度融合"],
|
|
27
|
+
["本地训练", "本地模型训练", "局部训练"],
|
|
28
|
+
["全局模型", "全局模型参数", "中心模型"],
|
|
29
|
+
["隐私预算", "隐私参数", "ε-差分隐私参数"]
|
|
30
|
+
],
|
|
31
|
+
"forbidden_terms": [
|
|
32
|
+
"等",
|
|
33
|
+
"或类似",
|
|
34
|
+
"大约",
|
|
35
|
+
"约",
|
|
36
|
+
"优化",
|
|
37
|
+
"改进",
|
|
38
|
+
"优秀",
|
|
39
|
+
"高效",
|
|
40
|
+
"先进"
|
|
41
|
+
],
|
|
42
|
+
"notes": [
|
|
43
|
+
"本术语库为联邦学习领域专利示例",
|
|
44
|
+
"请根据实际技术领域创建专用术语库",
|
|
45
|
+
"standard_terms: 推荐使用的标准术语",
|
|
46
|
+
"synonym_groups: 同义词组(第一个为标准术语)",
|
|
47
|
+
"forbidden_terms: 应避免使用的模糊词汇"
|
|
48
|
+
]
|
|
49
|
+
}
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Terminology Consistency Checker for Patent Documents
|
|
4
|
+
|
|
5
|
+
用于检查专利文档中的术语一致性,确保全文使用统一的技术术语。
|
|
6
|
+
|
|
7
|
+
目标: 术语一致性 ≥ 90%
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python term_checker.py <patent_document_path> <terminology_database_path>
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
import re
|
|
15
|
+
import json
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import List, Dict, Tuple, Set
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TermChecker:
|
|
21
|
+
"""术语一致性检查器"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, threshold: float = 0.90):
|
|
24
|
+
"""
|
|
25
|
+
初始化术语检查器
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
threshold: 一致性阈值,默认0.90(90%)
|
|
29
|
+
"""
|
|
30
|
+
self.threshold = threshold
|
|
31
|
+
self.term_database = {} # 标准术语库
|
|
32
|
+
self.synonym_groups = [] # 同义词组
|
|
33
|
+
|
|
34
|
+
def load_terminology(self, file_path: str):
|
|
35
|
+
"""
|
|
36
|
+
加载术语库
|
|
37
|
+
|
|
38
|
+
术语库格式(JSON):
|
|
39
|
+
{
|
|
40
|
+
"standard_terms": {
|
|
41
|
+
"联邦学习": ["标准术语", "应在全文统一使用"],
|
|
42
|
+
"用户画像数据": ["标准术语"],
|
|
43
|
+
...
|
|
44
|
+
},
|
|
45
|
+
"synonym_groups": [
|
|
46
|
+
["联邦学习", "分布式学习", "联合学习"],
|
|
47
|
+
["用户画像", "用户数据", "用户特征"]
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
file_path: 术语库文件路径
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
56
|
+
data = json.load(f)
|
|
57
|
+
|
|
58
|
+
self.term_database = data.get('standard_terms', {})
|
|
59
|
+
self.synonym_groups = data.get('synonym_groups', [])
|
|
60
|
+
|
|
61
|
+
except FileNotFoundError:
|
|
62
|
+
print(f"⚠️ 术语库文件不存在: {file_path}")
|
|
63
|
+
print("将使用空术语库进行检查。")
|
|
64
|
+
except json.JSONDecodeError as e:
|
|
65
|
+
print(f"❌ 术语库文件格式错误: {e}")
|
|
66
|
+
sys.exit(2)
|
|
67
|
+
|
|
68
|
+
def extract_terms(self, text: str) -> Dict[str, List[Tuple[int, str]]]:
|
|
69
|
+
"""
|
|
70
|
+
从文本中提取术语及其位置
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
text: 专利文档文本
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
{术语: [(行号, 上下文), ...]}
|
|
77
|
+
"""
|
|
78
|
+
terms_found = {}
|
|
79
|
+
|
|
80
|
+
# 按行处理
|
|
81
|
+
lines = text.split('\n')
|
|
82
|
+
|
|
83
|
+
for line_no, line in enumerate(lines, 1):
|
|
84
|
+
# 检查术语库中的标准术语
|
|
85
|
+
for term in self.term_database.keys():
|
|
86
|
+
if term in line:
|
|
87
|
+
if term not in terms_found:
|
|
88
|
+
terms_found[term] = []
|
|
89
|
+
# 提取上下文(前后各20个字符)
|
|
90
|
+
match_pos = line.find(term)
|
|
91
|
+
context_start = max(0, match_pos - 20)
|
|
92
|
+
context_end = min(len(line), match_pos + len(term) + 20)
|
|
93
|
+
context = line[context_start:context_end]
|
|
94
|
+
terms_found[term].append((line_no, context))
|
|
95
|
+
|
|
96
|
+
# 检查同义词组
|
|
97
|
+
for synonym_group in self.synonym_groups:
|
|
98
|
+
for synonym in synonym_group:
|
|
99
|
+
if synonym in line:
|
|
100
|
+
# 记录非标准术语
|
|
101
|
+
standard_term = synonym_group[0] # 第一个为标准术语
|
|
102
|
+
if synonym != standard_term:
|
|
103
|
+
key = f"⚠️ {synonym} (建议: {standard_term})"
|
|
104
|
+
if key not in terms_found:
|
|
105
|
+
terms_found[key] = []
|
|
106
|
+
match_pos = line.find(synonym)
|
|
107
|
+
context_start = max(0, match_pos - 20)
|
|
108
|
+
context_end = min(len(line), match_pos + len(synonym) + 20)
|
|
109
|
+
context = line[context_start:context_end]
|
|
110
|
+
terms_found[key].append((line_no, context))
|
|
111
|
+
|
|
112
|
+
return terms_found
|
|
113
|
+
|
|
114
|
+
def check_consistency(self, text: str) -> Dict:
|
|
115
|
+
"""
|
|
116
|
+
检查术语一致性
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
text: 专利文档文本
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
检查结果字典
|
|
123
|
+
"""
|
|
124
|
+
terms_found = self.extract_terms(text)
|
|
125
|
+
|
|
126
|
+
# 分析一致性问题
|
|
127
|
+
issues = []
|
|
128
|
+
warnings = []
|
|
129
|
+
|
|
130
|
+
# 检查同义词混用
|
|
131
|
+
for synonym_group in self.synonym_groups:
|
|
132
|
+
found_variants = [term for term in synonym_group if term in text]
|
|
133
|
+
if len(found_variants) > 1:
|
|
134
|
+
standard_term = synonym_group[0]
|
|
135
|
+
issues.append({
|
|
136
|
+
'type': 'synonym_mixing',
|
|
137
|
+
'standard_term': standard_term,
|
|
138
|
+
'variants_found': found_variants,
|
|
139
|
+
'severity': 'high'
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
# 检查非标准术语使用
|
|
143
|
+
for key in terms_found.keys():
|
|
144
|
+
if key.startswith('⚠️'):
|
|
145
|
+
warnings.append({
|
|
146
|
+
'type': 'non_standard_term',
|
|
147
|
+
'term': key,
|
|
148
|
+
'occurrences': terms_found[key],
|
|
149
|
+
'severity': 'medium'
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
# 计算一致性得分
|
|
153
|
+
total_term_usages = sum(len(occurrences) for occurrences in terms_found.values())
|
|
154
|
+
inconsistent_usages = sum(len(occurrences) for key, occurrences in terms_found.items() if key.startswith('⚠️'))
|
|
155
|
+
|
|
156
|
+
if total_term_usages > 0:
|
|
157
|
+
consistency_score = 1 - (inconsistent_usages / total_term_usages)
|
|
158
|
+
else:
|
|
159
|
+
consistency_score = 1.0
|
|
160
|
+
|
|
161
|
+
# 判断是否通过
|
|
162
|
+
passed = consistency_score >= self.threshold and len(issues) == 0
|
|
163
|
+
|
|
164
|
+
return {
|
|
165
|
+
'passed': passed,
|
|
166
|
+
'consistency_score': consistency_score,
|
|
167
|
+
'threshold': self.threshold,
|
|
168
|
+
'total_term_usages': total_term_usages,
|
|
169
|
+
'inconsistent_usages': inconsistent_usages,
|
|
170
|
+
'issues': issues,
|
|
171
|
+
'warnings': warnings,
|
|
172
|
+
'terms_found': terms_found
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
def check_document(self, doc_path: str, term_db_path: str = None) -> Dict:
|
|
176
|
+
"""
|
|
177
|
+
检查文档的术语一致性
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
doc_path: 文档路径
|
|
181
|
+
term_db_path: 术语库路径(可选)
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
检查结果字典
|
|
185
|
+
"""
|
|
186
|
+
try:
|
|
187
|
+
# 加载术语库
|
|
188
|
+
if term_db_path:
|
|
189
|
+
self.load_terminology(term_db_path)
|
|
190
|
+
|
|
191
|
+
# 读取文档
|
|
192
|
+
with open(doc_path, 'r', encoding='utf-8') as f:
|
|
193
|
+
text = f.read()
|
|
194
|
+
|
|
195
|
+
# 检查一致性
|
|
196
|
+
return self.check_consistency(text)
|
|
197
|
+
|
|
198
|
+
except FileNotFoundError:
|
|
199
|
+
return {'error': f'文件不存在: {doc_path}'}
|
|
200
|
+
except Exception as e:
|
|
201
|
+
return {'error': f'处理文件时出错: {str(e)}'}
|
|
202
|
+
|
|
203
|
+
def format_report(self, result: Dict) -> str:
|
|
204
|
+
"""
|
|
205
|
+
格式化检查报告
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
result: 检查结果
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
格式化的报告字符串
|
|
212
|
+
"""
|
|
213
|
+
if 'error' in result:
|
|
214
|
+
return f"❌ Error: {result['error']}"
|
|
215
|
+
|
|
216
|
+
score = result['consistency_score']
|
|
217
|
+
threshold = result['threshold']
|
|
218
|
+
passed = result['passed']
|
|
219
|
+
issues = result['issues']
|
|
220
|
+
warnings = result['warnings']
|
|
221
|
+
|
|
222
|
+
# 状态emoji
|
|
223
|
+
status = "✅ Pass" if passed else "❌ Fail"
|
|
224
|
+
|
|
225
|
+
# 构建报告
|
|
226
|
+
report = []
|
|
227
|
+
report.append("=" * 60)
|
|
228
|
+
report.append("Terminology Consistency Check Report")
|
|
229
|
+
report.append("=" * 60)
|
|
230
|
+
report.append("")
|
|
231
|
+
report.append(f"Consistency Score: {score * 100:.2f}% ({status} - 目标≥{threshold * 100:.0f}%)")
|
|
232
|
+
report.append(f"Total Term Usages: {result['total_term_usages']}")
|
|
233
|
+
report.append(f"Inconsistent Usages: {result['inconsistent_usages']}")
|
|
234
|
+
report.append("")
|
|
235
|
+
|
|
236
|
+
# 严重问题(同义词混用)
|
|
237
|
+
if issues:
|
|
238
|
+
report.append("❌ Critical Issues (同义词混用):")
|
|
239
|
+
report.append("-" * 60)
|
|
240
|
+
for issue in issues:
|
|
241
|
+
report.append(f"标准术语: {issue['standard_term']}")
|
|
242
|
+
report.append(f"发现变体: {', '.join(issue['variants_found'])}")
|
|
243
|
+
report.append(f"建议: 全文统一使用 '{issue['standard_term']}'")
|
|
244
|
+
report.append("")
|
|
245
|
+
|
|
246
|
+
# 警告(非标准术语)
|
|
247
|
+
if warnings:
|
|
248
|
+
report.append("⚠️ Warnings (非标准术语):")
|
|
249
|
+
report.append("-" * 60)
|
|
250
|
+
for warning in warnings[:5]: # 最多显示5个
|
|
251
|
+
term_key = warning['term']
|
|
252
|
+
occurrences = warning['occurrences']
|
|
253
|
+
report.append(f"{term_key}")
|
|
254
|
+
for line_no, context in occurrences[:3]: # 最多显示3个位置
|
|
255
|
+
report.append(f" Line {line_no}: ...{context}...")
|
|
256
|
+
report.append("")
|
|
257
|
+
|
|
258
|
+
if len(warnings) > 5:
|
|
259
|
+
report.append(f"... and {len(warnings) - 5} more warnings")
|
|
260
|
+
report.append("")
|
|
261
|
+
|
|
262
|
+
# 建议
|
|
263
|
+
report.append("Recommendations:")
|
|
264
|
+
report.append("-" * 60)
|
|
265
|
+
if passed:
|
|
266
|
+
report.append("✅ 术语一致性达标,全文术语使用规范。")
|
|
267
|
+
else:
|
|
268
|
+
report.append("⚠️ 术语一致性未达标,建议:")
|
|
269
|
+
report.append(" 1. 全文统一使用标准术语(从术语库选择)")
|
|
270
|
+
report.append(" 2. 避免同义词混用(如'联邦学习'和'分布式学习')")
|
|
271
|
+
report.append(" 3. 权利要求书与说明书术语保持一致")
|
|
272
|
+
|
|
273
|
+
report.append("")
|
|
274
|
+
report.append("=" * 60)
|
|
275
|
+
|
|
276
|
+
return '\n'.join(report)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def create_sample_terminology():
|
|
280
|
+
"""创建示例术语库"""
|
|
281
|
+
sample_terminology = {
|
|
282
|
+
"standard_terms": {
|
|
283
|
+
"联邦学习": ["标准术语", "分布式机器学习方法"],
|
|
284
|
+
"用户画像数据": ["标准术语", "用户特征数据"],
|
|
285
|
+
"第三方鉴权节点": ["标准术语", "可信第三方"],
|
|
286
|
+
"差分隐私": ["标准术语", "隐私保护技术"],
|
|
287
|
+
"模型训练": ["标准术语"],
|
|
288
|
+
"数据加密": ["标准术语"],
|
|
289
|
+
"权利要求": ["专利术语"],
|
|
290
|
+
"技术方案": ["专利术语"],
|
|
291
|
+
"有益效果": ["专利术语"]
|
|
292
|
+
},
|
|
293
|
+
"synonym_groups": [
|
|
294
|
+
["联邦学习", "分布式学习", "联合学习", "协作学习"],
|
|
295
|
+
["用户画像数据", "用户数据", "用户特征", "用户信息"],
|
|
296
|
+
["第三方鉴权节点", "可信第三方", "中间方", "第三方服务器"],
|
|
297
|
+
["差分隐私", "隐私保护", "隐私计算"],
|
|
298
|
+
["模型训练", "训练模型", "模型学习"],
|
|
299
|
+
["数据加密", "加密数据", "加密传输"]
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return sample_terminology
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def main():
|
|
307
|
+
"""主函数"""
|
|
308
|
+
if len(sys.argv) < 2:
|
|
309
|
+
print("Usage: python term_checker.py <patent_document_path> [terminology_database_path]")
|
|
310
|
+
print("")
|
|
311
|
+
print("Example:")
|
|
312
|
+
print(" python term_checker.py /path/to/patent.txt /path/to/terminology.json")
|
|
313
|
+
print("")
|
|
314
|
+
print("如果未提供术语库,将使用默认术语库。")
|
|
315
|
+
sys.exit(1)
|
|
316
|
+
|
|
317
|
+
doc_path = sys.argv[1]
|
|
318
|
+
term_db_path = sys.argv[2] if len(sys.argv) > 2 else None
|
|
319
|
+
|
|
320
|
+
# 如果没有提供术语库,创建默认术语库
|
|
321
|
+
if not term_db_path:
|
|
322
|
+
import tempfile
|
|
323
|
+
sample_term = create_sample_terminology()
|
|
324
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as f:
|
|
325
|
+
json.dump(sample_term, f, ensure_ascii=False, indent=2)
|
|
326
|
+
term_db_path = f.name
|
|
327
|
+
print("⚠️ 未提供术语库,使用默认术语库(联邦学习领域)")
|
|
328
|
+
print("")
|
|
329
|
+
|
|
330
|
+
# 创建检查器
|
|
331
|
+
checker = TermChecker(threshold=0.90)
|
|
332
|
+
|
|
333
|
+
# 检查文档
|
|
334
|
+
print(f"Checking document: {doc_path}")
|
|
335
|
+
if term_db_path:
|
|
336
|
+
print(f"Terminology database: {term_db_path}")
|
|
337
|
+
print("")
|
|
338
|
+
|
|
339
|
+
result = checker.check_document(doc_path, term_db_path)
|
|
340
|
+
|
|
341
|
+
# 输出报告
|
|
342
|
+
report = checker.format_report(result)
|
|
343
|
+
print(report)
|
|
344
|
+
|
|
345
|
+
# 返回退出码
|
|
346
|
+
if 'error' in result:
|
|
347
|
+
sys.exit(2)
|
|
348
|
+
elif not result['passed']:
|
|
349
|
+
sys.exit(1)
|
|
350
|
+
else:
|
|
351
|
+
sys.exit(0)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
if __name__ == '__main__':
|
|
355
|
+
main()
|