article-tagger 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. article_tagger-0.4.0/.claude/settings.local.json +35 -0
  2. article_tagger-0.4.0/.claude/skills/article-tagger/skill.md +75 -0
  3. article_tagger-0.4.0/.claude/skills/atag/skill.md +98 -0
  4. article_tagger-0.4.0/.dockerignore +13 -0
  5. article_tagger-0.4.0/.env.example +34 -0
  6. article_tagger-0.4.0/.github/workflows/ci.yml +44 -0
  7. article_tagger-0.4.0/.gitignore +17 -0
  8. article_tagger-0.4.0/.mcp.json +14 -0
  9. article_tagger-0.4.0/51/346/250/231/347/261/244/345/272/253.json +2446 -0
  10. article_tagger-0.4.0/AGENTS.md +134 -0
  11. article_tagger-0.4.0/CHANGELOG.md +81 -0
  12. article_tagger-0.4.0/CLAUDE.md +152 -0
  13. article_tagger-0.4.0/Dockerfile +43 -0
  14. article_tagger-0.4.0/PKG-INFO +353 -0
  15. article_tagger-0.4.0/README.md +314 -0
  16. article_tagger-0.4.0/docker-compose.yml +20 -0
  17. article_tagger-0.4.0/examples/benchmark_sample.json +22 -0
  18. article_tagger-0.4.0/package-lock.json +1198 -0
  19. article_tagger-0.4.0/package.json +4 -0
  20. article_tagger-0.4.0/packages/tool-schemas/anthropic.json +78 -0
  21. article_tagger-0.4.0/packages/tool-schemas/openai.json +94 -0
  22. article_tagger-0.4.0/plugins/example_length_filter.py +27 -0
  23. article_tagger-0.4.0/pyproject.toml +69 -0
  24. article_tagger-0.4.0/src/article_tagger/__init__.py +15 -0
  25. article_tagger-0.4.0/src/article_tagger/active_learning.py +168 -0
  26. article_tagger-0.4.0/src/article_tagger/agent_cli.py +266 -0
  27. article_tagger-0.4.0/src/article_tagger/analytics.py +109 -0
  28. article_tagger-0.4.0/src/article_tagger/api.py +327 -0
  29. article_tagger-0.4.0/src/article_tagger/booster.py +191 -0
  30. article_tagger-0.4.0/src/article_tagger/cache.py +123 -0
  31. article_tagger-0.4.0/src/article_tagger/chunker.py +26 -0
  32. article_tagger-0.4.0/src/article_tagger/cli.py +1118 -0
  33. article_tagger-0.4.0/src/article_tagger/config.py +74 -0
  34. article_tagger-0.4.0/src/article_tagger/cooccurrence.py +96 -0
  35. article_tagger-0.4.0/src/article_tagger/daemon.py +210 -0
  36. article_tagger-0.4.0/src/article_tagger/embedder.py +56 -0
  37. article_tagger-0.4.0/src/article_tagger/evaluation.py +168 -0
  38. article_tagger-0.4.0/src/article_tagger/exporter.py +99 -0
  39. article_tagger-0.4.0/src/article_tagger/hierarchy.py +125 -0
  40. article_tagger-0.4.0/src/article_tagger/indexer.py +111 -0
  41. article_tagger-0.4.0/src/article_tagger/mcp_server.py +353 -0
  42. article_tagger-0.4.0/src/article_tagger/middleware.py +34 -0
  43. article_tagger-0.4.0/src/article_tagger/models.py +115 -0
  44. article_tagger-0.4.0/src/article_tagger/pipeline.py +155 -0
  45. article_tagger-0.4.0/src/article_tagger/profiles.py +124 -0
  46. article_tagger-0.4.0/src/article_tagger/repl.py +174 -0
  47. article_tagger-0.4.0/src/article_tagger/reranker.py +92 -0
  48. article_tagger-0.4.0/src/article_tagger/retry.py +37 -0
  49. article_tagger-0.4.0/src/article_tagger/state_manager.py +278 -0
  50. article_tagger-0.4.0/src/article_tagger/store.py +163 -0
  51. article_tagger-0.4.0/src/article_tagger/tag_discovery.py +154 -0
  52. article_tagger-0.4.0/src/article_tagger/tag_enricher.py +144 -0
  53. article_tagger-0.4.0/src/article_tagger/tag_loader.py +73 -0
  54. article_tagger-0.4.0/src/article_tagger/tag_quality.py +112 -0
  55. article_tagger-0.4.0/src/article_tagger/tagger.py +321 -0
  56. article_tagger-0.4.0/src/article_tagger/text_augmenter.py +145 -0
  57. article_tagger-0.4.0/src/article_tagger/ui.py +202 -0
  58. article_tagger-0.4.0/src/article_tagger/visualizer.py +139 -0
  59. article_tagger-0.4.0/src/article_tagger/watcher.py +96 -0
  60. article_tagger-0.4.0/test-data/articles/01_muscle.txt +1 -0
  61. article_tagger-0.4.0/test-data/articles/02_wings.txt +1 -0
  62. article_tagger-0.4.0/test-data/articles/03_mecha.txt +1 -0
  63. article_tagger-0.4.0/test-data/articles/04_animal_ears.txt +1 -0
  64. article_tagger-0.4.0/test-data/articles/05_milf.txt +1 -0
  65. article_tagger-0.4.0/test-data/articles/06_transparent.txt +1 -0
  66. article_tagger-0.4.0/test-data/articles/07_multi_arm.txt +1 -0
  67. article_tagger-0.4.0/test-data/articles/08_anthropomorphize.txt +1 -0
  68. article_tagger-0.4.0/test-data/articles/09_age_regression.txt +1 -0
  69. article_tagger-0.4.0/test-data/articles/10_conjoined.txt +1 -0
  70. article_tagger-0.4.0/test-data/articles-v2/01_bunny_girl.txt +1 -0
  71. article_tagger-0.4.0/test-data/articles-v2/02_demon_girl.txt +1 -0
  72. article_tagger-0.4.0/test-data/articles-v2/03_centaur.txt +1 -0
  73. article_tagger-0.4.0/test-data/articles-v2/04_petrify.txt +1 -0
  74. article_tagger-0.4.0/test-data/articles-v2/05_cow_girl.txt +1 -0
  75. article_tagger-0.4.0/test-data/articles-v2/06_ghost.txt +1 -0
  76. article_tagger-0.4.0/test-data/articles-v2/07_furry.txt +1 -0
  77. article_tagger-0.4.0/test-data/articles-v2/08_fairy.txt +1 -0
  78. article_tagger-0.4.0/test-data/articles-v2/09_stretch.txt +1 -0
  79. article_tagger-0.4.0/test-data/articles-v2/10_transform.txt +1 -0
  80. article_tagger-0.4.0/test-data/articles-v3/01_bikini.txt +1 -0
  81. article_tagger-0.4.0/test-data/articles-v3/02_sportswear.txt +1 -0
  82. article_tagger-0.4.0/test-data/articles-v3/03_corset.txt +1 -0
  83. article_tagger-0.4.0/test-data/articles-v3/04_pregnant.txt +1 -0
  84. article_tagger-0.4.0/test-data/articles-v3/05_giantess.txt +1 -0
  85. article_tagger-0.4.0/test-data/articles-v3/06_zombie.txt +1 -0
  86. article_tagger-0.4.0/test-data/articles-v3/07_mecha_girl.txt +1 -0
  87. article_tagger-0.4.0/test-data/articles-v3/08_body_paint.txt +1 -0
  88. article_tagger-0.4.0/test-data/articles-v3/09_spiral_hair.txt +1 -0
  89. article_tagger-0.4.0/test-data/articles-v3/10_sarashi.txt +1 -0
  90. article_tagger-0.4.0/test-data/results-v2.json +372 -0
  91. article_tagger-0.4.0/test-data/results.json +372 -0
  92. article_tagger-0.4.0/tests/__init__.py +0 -0
  93. article_tagger-0.4.0/tests/conftest.py +106 -0
  94. article_tagger-0.4.0/tests/test_active_learning.py +128 -0
  95. article_tagger-0.4.0/tests/test_analytics.py +49 -0
  96. article_tagger-0.4.0/tests/test_api.py +123 -0
  97. article_tagger-0.4.0/tests/test_booster.py +170 -0
  98. article_tagger-0.4.0/tests/test_cache.py +116 -0
  99. article_tagger-0.4.0/tests/test_cli.py +56 -0
  100. article_tagger-0.4.0/tests/test_cooccurrence.py +47 -0
  101. article_tagger-0.4.0/tests/test_daemon.py +24 -0
  102. article_tagger-0.4.0/tests/test_enricher.py +67 -0
  103. article_tagger-0.4.0/tests/test_evaluation.py +62 -0
  104. article_tagger-0.4.0/tests/test_exporter.py +64 -0
  105. article_tagger-0.4.0/tests/test_hierarchy.py +114 -0
  106. article_tagger-0.4.0/tests/test_indexer.py +80 -0
  107. article_tagger-0.4.0/tests/test_integration.py +143 -0
  108. article_tagger-0.4.0/tests/test_pipeline.py +74 -0
  109. article_tagger-0.4.0/tests/test_profiles.py +57 -0
  110. article_tagger-0.4.0/tests/test_store.py +139 -0
  111. article_tagger-0.4.0/tests/test_tag_discovery.py +100 -0
  112. article_tagger-0.4.0/tests/test_tag_quality.py +58 -0
  113. article_tagger-0.4.0/tests/test_text_augmenter.py +98 -0
  114. article_tagger-0.4.0/tests/test_visualizer.py +61 -0
  115. article_tagger-0.4.0/tests/test_watcher.py +72 -0
  116. article_tagger-0.4.0/uv.lock +3419 -0
@@ -0,0 +1,35 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(pip install:*)",
5
+ "Bash(pip3 install:*)",
6
+ "Bash(uv venv:*)",
7
+ "Bash(source .venv/bin/activate)",
8
+ "Bash(uv pip:*)",
9
+ "Bash(\"/Users/dex/YD 2026/0323/Article Tagger/.venv/bin/python\" --version)",
10
+ "Bash(export KMP_DUPLICATE_LIB_OK=TRUE)",
11
+ "Bash(.venv/bin/pip install:*)",
12
+ "Bash(.venv/bin/python -m pytest tests/ -v)",
13
+ "Bash(.venv/bin/article-tagger --help)",
14
+ "Bash(.venv/bin/article-tagger state:*)",
15
+ "Bash(.venv/bin/article-tagger build-index:*)",
16
+ "Bash(.venv/bin/article-tagger tag:*)",
17
+ "Bash(ls:*)",
18
+ "Bash(.venv/bin/article-tagger model:*)",
19
+ "Bash(.venv/bin/article-tagger analytics:*)",
20
+ "Bash(.venv/bin/article-tagger quality:*)",
21
+ "Bash(.venv/bin/article-tagger profile:*)",
22
+ "Bash(.venv/bin/article-tagger weights:*)",
23
+ "Bash(.venv/bin/article-tagger visualize:*)",
24
+ "Bash(.venv/bin/python -m pytest tests/ -q)",
25
+ "Bash(xargs -I{} basename {})",
26
+ "Bash(gh auth:*)",
27
+ "Bash(gh repo:*)",
28
+ "Bash(python3 -c \":*)",
29
+ "Bash(.venv/bin/python -c \"import mcp; print\\(mcp.__version__\\)\")",
30
+ "Bash(.venv/bin/python -c \"from mcp.server.fastmcp import FastMCP; print\\(''OK''\\)\")",
31
+ "Bash(.venv/bin/python -m pytest tests/ -v --tb=short)",
32
+ "Bash(.venv/bin/article-tagger eval:*)"
33
+ ]
34
+ }
35
+ }
@@ -0,0 +1,75 @@
1
+ ---
2
+ name: article-tagger
3
+ description: Article Tagger v0.4 — 文章標籤標註、標籤庫管理、評估、狀態遷移、標籤發現、daemon 模式。
4
+ user_invocable: true
5
+ ---
6
+
7
+ # Article Tagger Skill
8
+
9
+ 你現在是 Article Tagger 操作員。根據需求使用 CLI 或 MCP 工具完成任務。
10
+
11
+ ## 快速指令
12
+
13
+ ```bash
14
+ article-tagger build-index --tags <file> # 建索引
15
+ article-tagger tag --text "..." --top-k 5 # 標註
16
+ article-tagger tag --text "..." --rerank # LLM 精排
17
+ article-tagger interactive # 互動 REPL
18
+ article-tagger watch --dir ./articles/ # 監控目錄
19
+ article-tagger analytics # 分析儀表板
20
+ article-tagger quality # 標籤品質
21
+ article-tagger visualize -o tags.html # 視覺化
22
+ article-tagger cooccur --rebuild # 共現圖
23
+ article-tagger weights # AL 權重
24
+ article-tagger enrich -i tags.json # LLM 增強標籤庫
25
+ article-tagger eval -b benchmark.json # 評估
26
+ article-tagger export --dir ./articles/ --fmt markdown # 匯出
27
+ article-tagger discover ./articles/ # 發現新標籤候選
28
+ article-tagger warmup # 預載模型+索引
29
+ ```
30
+
31
+ ## Daemon(免重載模型)
32
+ ```bash
33
+ article-tagger daemon start
34
+ article-tagger daemon tag --text "..."
35
+ article-tagger daemon stop
36
+ ```
37
+
38
+ ## 狀態遷移
39
+ ```bash
40
+ article-tagger state export -o bundle.tar.gz
41
+ article-tagger state import -f bundle.tar.gz
42
+ article-tagger state export-incr -o incr.tar.gz --since <ISO>
43
+ article-tagger state merge -f incr.tar.gz
44
+ ```
45
+
46
+ ## 模型 / Profile
47
+ ```bash
48
+ article-tagger model list | switch | compare
49
+ article-tagger profile list | save | load | delete
50
+ ```
51
+
52
+ ## 標籤發現
53
+ ```bash
54
+ # 掃描目錄,發現未在標籤庫中的高頻詞彙
55
+ article-tagger discover ./articles/ --min-freq 3 --top 10
56
+ article-tagger discover ./articles/ --json # JSON 格式輸出
57
+ ```
58
+
59
+ ## MCP 工具(24 個)
60
+ **標註**: tag_article, build_index, search_tags
61
+ **標籤 CRUD**: list_tags, create_tag, update_tag, delete_tag
62
+ **回饋**: add_feedback, undo_feedback, delete_feedback, get_weights
63
+ **歷史**: search_history, delete_history, export_history
64
+ **分析**: get_stats, get_analytics, get_quality_report
65
+ **狀態**: export_state, import_state
66
+ **模型**: switch_model, list_models
67
+ **其他**: list_profiles, get_pipeline_info, suggest_related_tags, discover_tags
68
+
69
+ ## 使用原則
70
+ 1. MCP 可用 → 用 MCP tools;不可用 → 降級 CLI
71
+ 2. Daemon 啟動時 → `daemon tag` 比直接 `tag` 快 100 倍
72
+ 3. 首次使用先建索引: `build-index --tags 51標籤庫.json`
73
+ 4. LLM 功能需要 `TAGGER_ANTHROPIC_API_KEY`
74
+ 5. `warmup` 預載模型到記憶體,加速首次查詢
75
+ 6. `discover` 發現新標籤候選,半自動擴展標籤庫
@@ -0,0 +1,98 @@
1
+ ---
2
+ name: atag
3
+ description: Agent-native article tagging — JSON-only CLI for automated tagging workflows, feedback loops, and tag management.
4
+ user_invocable: true
5
+ ---
6
+
7
+ # atag — Agent Article Tagger
8
+
9
+ 你現在是一個自動標籤 agent。使用 `atag` CLI 完成標籤相關任務。所有輸出都是 JSON。
10
+
11
+ ## 核心工作流程
12
+
13
+ ### 1. 標註文章
14
+ ```bash
15
+ # 直接傳文字
16
+ atag tag "文章內容..."
17
+
18
+ # 從檔案
19
+ atag tag -f article.txt
20
+
21
+ # Pipe(適合批次腳本)
22
+ echo "文章內容" | atag tag
23
+
24
+ # 帶上下文 ID(會原樣回傳)
25
+ atag tag "文章" -c "request-123"
26
+
27
+ # 自訂 top_k + 門檻
28
+ atag tag "文章" -k 10 -t 0.3
29
+ ```
30
+
31
+ ### 2. 批次標註
32
+ ```bash
33
+ # 目錄
34
+ atag batch -d ./articles/
35
+
36
+ # 指定檔案
37
+ atag batch file1.txt file2.txt
38
+
39
+ # JSONL stdin(每行一個 {"text": "...", "id": "..."})
40
+ cat articles.jsonl | atag batch
41
+ ```
42
+
43
+ ### 3. 回饋 → Active Learning
44
+ ```bash
45
+ atag feedback "兔女郎" true # 正確
46
+ atag feedback "高叉裝" false # 錯誤
47
+ ```
48
+
49
+ ### 4. 推薦相關標籤
50
+ ```bash
51
+ atag suggest "貓娘,女僕"
52
+ ```
53
+
54
+ ### 5. 搜尋標籤庫
55
+ ```bash
56
+ atag search "狐狸"
57
+ ```
58
+
59
+ ### 6. 系統狀態
60
+ ```bash
61
+ atag stats
62
+ atag quality
63
+ ```
64
+
65
+ ### 7. Tool Schema 匯出
66
+ ```bash
67
+ atag schema openai # OpenAI function calling
68
+ atag schema anthropic # Anthropic tool use
69
+ ```
70
+
71
+ ## Agent Chaining 範例
72
+
73
+ ```bash
74
+ # 標註 → 提取 top tag → 搜尋相關標籤 → 推薦
75
+ TAG=$(atag tag "文章" | jq -r '.tags[0].tag_name')
76
+ atag suggest "$TAG"
77
+
78
+ # 批次標註 → 過濾低信心 → 匯出
79
+ atag batch -d ./articles/ | jq '.results[] | .tags |= map(select(.score > 0.5))'
80
+
81
+ # 標註 + 自動回饋迴圈
82
+ RESULT=$(atag tag "文章")
83
+ echo "$RESULT" | jq -r '.tags[] | .tag_name' | while read tag; do
84
+ # Agent 判斷是否正確後回饋
85
+ atag feedback "$tag" true
86
+ done
87
+ ```
88
+
89
+ ## 錯誤處理
90
+ - exit 0 = 成功(有結果)
91
+ - exit 1 = 錯誤(JSON error 在 stderr)
92
+ - exit 2 = 成功但無結果(空 tags)
93
+
94
+ ## 規則
95
+ 1. **永遠用 `atag`**(不是 `article-tagger`)— 更短、純 JSON、agent-native
96
+ 2. **所有輸出都是 JSON** — 直接 pipe 到 `jq` 或程式解析
97
+ 3. **先 `atag stats` 檢查** — 確認 index 已載入再標註
98
+ 4. **回饋很重要** — 每次標註後盡量給回饋,系統會越來越準
@@ -0,0 +1,13 @@
1
+ .venv
2
+ __pycache__
3
+ *.pyc
4
+ .pytest_cache
5
+ .git
6
+ .claude
7
+ .agents
8
+ .codex
9
+ dev/
10
+ instincts/
11
+ tests/
12
+ *.egg-info
13
+ .DS_Store
@@ -0,0 +1,34 @@
1
+ # Article Tagger 環境變數
2
+ # 複製此檔案為 .env 並修改
3
+
4
+ # Embedding 模型
5
+ TAGGER_MODEL_NAME=paraphrase-multilingual-MiniLM-L12-v2
6
+
7
+ # 標註設定
8
+ TAGGER_TOP_K_RETURN=3
9
+ TAGGER_SIMILARITY_THRESHOLD=0.1
10
+ TAGGER_ENABLE_HIERARCHY=true
11
+ TAGGER_ENABLE_SYNONYMS=true
12
+ TAGGER_ENABLE_CHUNKING=true
13
+
14
+ # LLM Reranker(可選)
15
+ TAGGER_ENABLE_RERANKER=false
16
+ TAGGER_ANTHROPIC_API_KEY=
17
+
18
+ # API
19
+ TAGGER_API_HOST=127.0.0.1
20
+ TAGGER_API_PORT=8000
21
+ TAGGER_API_KEY=
22
+
23
+ # Rate Limiting
24
+ TAGGER_RATE_LIMIT_MAX=60
25
+ TAGGER_RATE_LIMIT_WINDOW=60
26
+
27
+ # 路徑
28
+ TAGGER_INDEX_DIR=data/index
29
+ TAGGER_DATA_DIR=data
30
+ TAGGER_PLUGINS_DIR=plugins
31
+
32
+ # 其他
33
+ KMP_DUPLICATE_LIB_OK=TRUE
34
+ TAGGER_LOG_LEVEL=info
@@ -0,0 +1,44 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v4
26
+
27
+ - name: Install dependencies
28
+ run: |
29
+ uv venv .venv
30
+ uv pip install -e ".[dev]" --python .venv/bin/python
31
+
32
+ - name: Run tests
33
+ env:
34
+ KMP_DUPLICATE_LIB_OK: "TRUE"
35
+ OPENBLAS_NUM_THREADS: "1"
36
+ OMP_NUM_THREADS: "1"
37
+ run: |
38
+ .venv/bin/python -m pytest tests/ -v --tb=short -x --timeout=120
39
+
40
+ - name: Check CLI loads
41
+ env:
42
+ KMP_DUPLICATE_LIB_OK: "TRUE"
43
+ run: |
44
+ .venv/bin/article-tagger --help
@@ -0,0 +1,17 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ .pytest_cache/
5
+ data/index/
6
+ data/feedback.json
7
+ data/history.json
8
+ data/cooccurrence.json
9
+ data/tag_weights.json
10
+ data/profiles/
11
+ *.egg-info/
12
+ .DS_Store
13
+ .env
14
+ dist/
15
+ build/
16
+ node_modules/
17
+ packages/*/dist/
@@ -0,0 +1,14 @@
1
+ {
2
+ "mcpServers": {
3
+ "sequential-thinking": {
4
+ "command": "npx",
5
+ "args": ["@modelcontextprotocol/server-sequential-thinking"],
6
+ "type": "stdio"
7
+ },
8
+ "article-tagger": {
9
+ "command": "python",
10
+ "args": ["-m", "article_tagger.mcp_server"],
11
+ "type": "stdio"
12
+ }
13
+ }
14
+ }