code2skill 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. code2skill-0.1.2/MANIFEST.in +6 -0
  2. code2skill-0.1.2/PKG-INFO +497 -0
  3. code2skill-0.1.2/README.md +465 -0
  4. {code2skill-0.1.0 → code2skill-0.1.2}/pyproject.toml +18 -3
  5. code2skill-0.1.2/src/code2skill/__init__.py +39 -0
  6. code2skill-0.1.2/src/code2skill/adapt.py +94 -0
  7. code2skill-0.1.2/src/code2skill/analyzers/__init__.py +1 -0
  8. code2skill-0.1.2/src/code2skill/analyzers/project_classifier.py +161 -0
  9. code2skill-0.1.2/src/code2skill/analyzers/rules_analyzer.py +158 -0
  10. code2skill-0.1.2/src/code2skill/analyzers/skill_blueprint_builder.py +315 -0
  11. code2skill-0.1.2/src/code2skill/analyzers/workflow_analyzer.py +49 -0
  12. code2skill-0.1.2/src/code2skill/cli.py +334 -0
  13. code2skill-0.1.2/src/code2skill/config.py +203 -0
  14. code2skill-0.1.2/src/code2skill/core.py +1060 -0
  15. code2skill-0.1.2/src/code2skill/costing.py +259 -0
  16. code2skill-0.1.2/src/code2skill/extractors/__init__.py +1 -0
  17. code2skill-0.1.2/src/code2skill/extractors/base.py +11 -0
  18. code2skill-0.1.2/src/code2skill/extractors/config_extractor.py +137 -0
  19. code2skill-0.1.2/src/code2skill/extractors/python_extractor.py +331 -0
  20. code2skill-0.1.2/src/code2skill/git_client.py +276 -0
  21. code2skill-0.1.2/src/code2skill/impact.py +108 -0
  22. code2skill-0.1.2/src/code2skill/import_graph.py +131 -0
  23. code2skill-0.1.2/src/code2skill/json_utils.py +242 -0
  24. code2skill-0.1.2/src/code2skill/llm_backend.py +222 -0
  25. code2skill-0.1.2/src/code2skill/models.py +366 -0
  26. code2skill-0.1.2/src/code2skill/pattern_detector.py +300 -0
  27. code2skill-0.1.2/src/code2skill/python_imports.py +103 -0
  28. code2skill-0.1.2/src/code2skill/renderers/__init__.py +1 -0
  29. code2skill-0.1.2/src/code2skill/renderers/json_renderer.py +11 -0
  30. code2skill-0.1.2/src/code2skill/renderers/markdown_renderer.py +180 -0
  31. code2skill-0.1.2/src/code2skill/scanner/__init__.py +1 -0
  32. code2skill-0.1.2/src/code2skill/scanner/budget.py +35 -0
  33. code2skill-0.1.2/src/code2skill/scanner/detector.py +49 -0
  34. code2skill-0.1.2/src/code2skill/scanner/filters.py +208 -0
  35. code2skill-0.1.2/src/code2skill/scanner/prioritizer.py +220 -0
  36. code2skill-0.1.2/src/code2skill/scanner/repository.py +211 -0
  37. code2skill-0.1.2/src/code2skill/skill_generator.py +911 -0
  38. code2skill-0.1.2/src/code2skill/skill_planner.py +332 -0
  39. code2skill-0.1.2/src/code2skill/state_store.py +193 -0
  40. code2skill-0.1.2/src/code2skill.egg-info/PKG-INFO +497 -0
  41. code2skill-0.1.2/src/code2skill.egg-info/SOURCES.txt +46 -0
  42. code2skill-0.1.2/src/code2skill.egg-info/requires.txt +8 -0
  43. code2skill-0.1.0/PKG-INFO +0 -80
  44. code2skill-0.1.0/README.md +0 -55
  45. code2skill-0.1.0/src/code2skill/__init__.py +0 -7
  46. code2skill-0.1.0/src/code2skill/cli.py +0 -61
  47. code2skill-0.1.0/src/code2skill/core.py +0 -23
  48. code2skill-0.1.0/src/code2skill.egg-info/PKG-INFO +0 -80
  49. code2skill-0.1.0/src/code2skill.egg-info/SOURCES.txt +0 -12
  50. {code2skill-0.1.0 → code2skill-0.1.2}/LICENSE +0 -0
  51. {code2skill-0.1.0 → code2skill-0.1.2}/setup.cfg +0 -0
  52. {code2skill-0.1.0 → code2skill-0.1.2}/src/code2skill/__main__.py +0 -0
  53. {code2skill-0.1.0 → code2skill-0.1.2}/src/code2skill.egg-info/dependency_links.txt +0 -0
  54. {code2skill-0.1.0 → code2skill-0.1.2}/src/code2skill.egg-info/entry_points.txt +0 -0
  55. {code2skill-0.1.0 → code2skill-0.1.2}/src/code2skill.egg-info/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ include LICENSE
2
+ include README.md
3
+ prune .code2skill
4
+ prune tests
5
+ prune tests/__pycache__
6
+ global-exclude __pycache__ *.py[cod]
@@ -0,0 +1,497 @@
1
+ Metadata-Version: 2.4
2
+ Name: code2skill
3
+ Version: 0.1.2
4
+ Summary: Turn a code repository into structured AI skill context.
5
+ Author: OceanusXXD
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/oceanusXXD/code2skill
8
+ Project-URL: Repository, https://github.com/oceanusXXD/code2skill
9
+ Project-URL: Documentation, https://github.com/oceanusXXD/code2skill#readme
10
+ Project-URL: Issues, https://github.com/oceanusXXD/code2skill/issues
11
+ Keywords: ai,cli,developer-tools,skills
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Software Development :: Build Tools
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
27
+ Provides-Extra: dev
28
+ Requires-Dist: build>=1.2; extra == "dev"
29
+ Requires-Dist: pytest>=8.0; extra == "dev"
30
+ Requires-Dist: twine>=6.0; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # code2skill
34
+
35
+ 中文优先,后附英文快速说明。
36
+ Chinese first, with an English quick reference at the end.
37
+
38
+ `code2skill` 是一个面向 Python 仓库的 CLI。它会把真实代码仓库编译成一组结构化项目知识和 Skill 文档,供 Cursor、Claude Code、Codex、Copilot、Windsurf 等 AI 编程助手消费。
39
+
40
+ 它的目标不是“总结仓库”,而是生成能直接用于后续编码、审查和增量更新的高密度上下文。
41
+
42
+ ## 适用范围
43
+
44
+ - 当前只面向 Python 仓库
45
+ - Phase 1 不调用 LLM
46
+ - Python 源码使用 `ast` 做结构提取
47
+ - 支持 `scan`、`estimate`、`ci`、`adapt`
48
+ - 支持 `openai`、`claude`、`qwen`
49
+ - 默认使用英文 prompt 和英文 Skill 输出,不使用 emoji,证据不足处标记 `[Needs confirmation]`
50
+
51
+ ## 核心特性
52
+
53
+ - 结构扫描:目录发现、过滤、预算裁剪、Python 骨架提取
54
+ - 结构分析:import graph、角色修正、模式检测、抽象规则提炼
55
+ - Skill 规划:用 1 次 LLM 调用决定生成哪些 Skill、每个 Skill 读哪些文件
56
+ - Skill 生成:按 Skill 聚焦上下文逐个生成高质量 Markdown
57
+ - 增量更新:在 CI 中根据 Git diff 只重写受影响的 Skill
58
+ - 目标适配:把 `skills/*.md` 复制或合并到 Cursor / Codex / Claude 等约定位置
59
+
60
+ ## 30 秒上手
61
+
62
+ 先设置模型环境变量:
63
+
64
+ ```bash
65
+ export QWEN_API_KEY=...
66
+ export CODE2SKILL_LLM=qwen
67
+ export CODE2SKILL_MODEL=qwen-plus-latest
68
+ ```
69
+
70
+ PowerShell:
71
+
72
+ ```powershell
73
+ $env:QWEN_API_KEY="..."
74
+ $env:CODE2SKILL_LLM="qwen"
75
+ $env:CODE2SKILL_MODEL="qwen-plus-latest"
76
+ ```
77
+
78
+ 进入要分析的仓库目录后直接运行:
79
+
80
+ ```bash
81
+ code2skill scan
82
+ ```
83
+
84
+ 现在 `repo_path` 默认就是当前目录,所以在仓库根目录里不需要再写 `.`。
85
+
86
+ 如果只想先做结构扫描:
87
+
88
+ ```bash
89
+ code2skill scan --structure-only
90
+ ```
91
+
92
+ 如果已经有历史状态,想走自动增量:
93
+
94
+ ```bash
95
+ code2skill ci --mode auto
96
+ ```
97
+
98
+ ## 安装
99
+
100
+ 发布版:
101
+
102
+ ```bash
103
+ pip install code2skill
104
+ ```
105
+
106
+ 开发版:
107
+
108
+ ```bash
109
+ pip install -e .[dev]
110
+ ```
111
+
112
+ 命令入口:
113
+
114
+ ```bash
115
+ code2skill --help
116
+ python -m code2skill --help
117
+ ```
118
+
119
+ ## 常用环境变量
120
+
121
+ 这些变量是为了让本地和 CI 使用更短的命令。
122
+
123
+ LLM API Key:
124
+
125
+ ```bash
126
+ export OPENAI_API_KEY=...
127
+ export ANTHROPIC_API_KEY=...
128
+ export QWEN_API_KEY=...
129
+ ```
130
+
131
+ PowerShell:
132
+
133
+ ```powershell
134
+ $env:OPENAI_API_KEY="..."
135
+ $env:ANTHROPIC_API_KEY="..."
136
+ $env:QWEN_API_KEY="..."
137
+ ```
138
+
139
+ CLI 默认值:
140
+
141
+ ```bash
142
+ export CODE2SKILL_LLM=qwen
143
+ export CODE2SKILL_MODEL=qwen-plus-latest
144
+ export CODE2SKILL_OUTPUT_DIR=.code2skill
145
+ export CODE2SKILL_MAX_SKILLS=6
146
+ export CODE2SKILL_BASE_REF=origin/main
147
+ ```
148
+
149
+ PowerShell:
150
+
151
+ ```powershell
152
+ $env:CODE2SKILL_LLM="qwen"
153
+ $env:CODE2SKILL_MODEL="qwen-plus-latest"
154
+ $env:CODE2SKILL_OUTPUT_DIR=".code2skill"
155
+ $env:CODE2SKILL_MAX_SKILLS="6"
156
+ $env:CODE2SKILL_BASE_REF="origin/main"
157
+ ```
158
+
159
+ 说明:
160
+
161
+ - `qwen` 默认走阿里国际站兼容接口
162
+ - `qwen` 会读取 `QWEN_API_KEY`,也兼容 `DASHSCOPE_API_KEY`
163
+ - 如果没有配置对应 API key,命令会直接报错,不会静默降级
164
+
165
+ ## 命令速查
166
+
167
+ 完整扫描并生成 Skill:
168
+
169
+ ```bash
170
+ code2skill scan --llm qwen --model qwen-plus-latest
171
+ ```
172
+
173
+ 只做结构扫描:
174
+
175
+ ```bash
176
+ code2skill scan --structure-only
177
+ ```
178
+
179
+ 自动增量:
180
+
181
+ ```bash
182
+ code2skill ci --mode auto --base-ref origin/main
183
+ ```
184
+
185
+ 只做成本预估:
186
+
187
+ ```bash
188
+ code2skill estimate
189
+ ```
190
+
191
+ 把 Skill 合并到 Codex 规则文件:
192
+
193
+ ```bash
194
+ code2skill adapt --target codex --source-dir .code2skill/skills
195
+ ```
196
+
197
+ 适配所有目标:
198
+
199
+ ```bash
200
+ code2skill adapt --target all --source-dir .code2skill/skills
201
+ ```
202
+
203
+ ## 工作流说明
204
+
205
+ ### Phase 1:结构扫描
206
+
207
+ 输入:
208
+
209
+ - 仓库路径
210
+
211
+ 输出:
212
+
213
+ - `project-summary.md`
214
+ - `skill-blueprint.json`
215
+ - `references/architecture.md`
216
+ - `references/code-style.md`
217
+ - `references/workflows.md`
218
+ - `references/api-usage.md`
219
+ - `report.json`
220
+ - `state/analysis-state.json`
221
+
222
+ 主要步骤:
223
+
224
+ 1. 文件发现与过滤
225
+ 2. 粗评分与预算裁剪
226
+ 3. Python AST 骨架提取
227
+ 4. import graph 构建
228
+ 5. 基于结构信号修正优先级和角色
229
+ 6. 模式检测与抽象规则提炼
230
+ 7. 组装 `SkillBlueprint`
231
+
232
+ ### Phase 2:Skill 规划
233
+
234
+ 输入:
235
+
236
+ - `skill-blueprint.json`
237
+
238
+ 输出:
239
+
240
+ - `skill-plan.json`
241
+
242
+ 主要步骤:
243
+
244
+ 1. 压缩项目画像、目录摘要、依赖簇、核心模块、规则和流程
245
+ 2. 调用 1 次 LLM
246
+ 3. 决定要生成哪些 Skill
247
+ 4. 为每个 Skill 选出最值得阅读的文件集合
248
+
249
+ ### Phase 3:Skill 生成
250
+
251
+ 输入:
252
+
253
+ - `skill-plan.json`
254
+ - 每个 Skill 对应的文件正文或骨架
255
+
256
+ 输出:
257
+
258
+ - `skills/index.md`
259
+ - `skills/*.md`
260
+
261
+ 主要步骤:
262
+
263
+ 1. 按 Skill 收集上下文文件
264
+ 2. 筛选与该 Skill 最相关的抽象规则
265
+ 3. 调用 LLM 生成 Skill 文档
266
+ 4. 在增量模式下只修订受影响的 section
267
+
268
+ ### Adapt:目标格式适配
269
+
270
+ 输入:
271
+
272
+ - `skills/*.md`
273
+
274
+ 输出:
275
+
276
+ - Cursor:复制到 `.cursor/rules/`
277
+ - Claude:合并为 `CLAUDE.md`
278
+ - Codex:合并为 `AGENTS.md`
279
+ - Copilot:合并为 `.github/copilot-instructions.md`
280
+ - Windsurf:合并为 `.windsurfrules`
281
+
282
+ ## 输出目录
283
+
284
+ ```text
285
+ .code2skill/
286
+ project-summary.md
287
+ skill-blueprint.json
288
+ skill-plan.json
289
+ report.json
290
+ references/
291
+ architecture.md
292
+ code-style.md
293
+ workflows.md
294
+ api-usage.md
295
+ skills/
296
+ index.md
297
+ *.md
298
+ state/
299
+ analysis-state.json
300
+ ```
301
+
302
+ ## CI / 增量使用建议
303
+
304
+ 推荐把 `.code2skill/` 当成 CI cache 或 artifact,而不是提交进仓库。
305
+
306
+ 增量模式依赖这些文件:
307
+
308
+ - `.code2skill/state/analysis-state.json`
309
+ - `.code2skill/skill-plan.json`
310
+ - 最好同时恢复 `.code2skill/skills/`
311
+
312
+ 如果这些文件缺失,或者 diff 条件不满足,`ci --mode auto` 会自动回退到全量模式。
313
+
314
+ ### 自动回退到全量的常见情况
315
+
316
+ - 没有历史状态
317
+ - 改动了核心配置文件,例如 `pyproject.toml`
318
+ - 改动文件数超过 `--max-incremental-changed-files`
319
+ - 当前目录不是 Git 仓库,且也没有提供 `--diff-file`
320
+
321
+ ### GitHub Actions 示例
322
+
323
+ ```yaml
324
+ name: code2skill
325
+
326
+ on:
327
+ pull_request:
328
+
329
+ jobs:
330
+ build-skills:
331
+ runs-on: ubuntu-latest
332
+
333
+ steps:
334
+ - name: Checkout
335
+ uses: actions/checkout@v4
336
+ with:
337
+ fetch-depth: 0
338
+
339
+ - name: Setup Python
340
+ uses: actions/setup-python@v5
341
+ with:
342
+ python-version: "3.11"
343
+
344
+ - name: Restore code2skill cache
345
+ uses: actions/cache@v4
346
+ with:
347
+ path: .code2skill
348
+ key: code2skill-${{ runner.os }}-${{ github.ref_name }}-${{ github.sha }}
349
+ restore-keys: |
350
+ code2skill-${{ runner.os }}-${{ github.ref_name }}-
351
+
352
+ - name: Install
353
+ run: pip install -e .[dev]
354
+
355
+ - name: Run code2skill
356
+ env:
357
+ QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }}
358
+ CODE2SKILL_LLM: qwen
359
+ CODE2SKILL_MODEL: qwen-plus-latest
360
+ run: |
361
+ code2skill ci \
362
+ --mode auto \
363
+ --base-ref origin/${{ github.base_ref }} \
364
+ --head-ref HEAD
365
+
366
+ - name: Upload outputs
367
+ uses: actions/upload-artifact@v4
368
+ with:
369
+ name: code2skill-output
370
+ path: .code2skill
371
+ ```
372
+
373
+ 说明:
374
+
375
+ - `fetch-depth: 0` 很重要,否则基线提交可能不在本地历史里
376
+ - `restore-keys` 能让同一分支上的后续提交复用历史状态
377
+ - 第一次没有 cache 时,`ci --mode auto` 会自动走全量
378
+
379
+ ## 生成产物与 Git 管理
380
+
381
+ 默认情况下,仓库根目录下的这些目录已经在 `.gitignore` 中忽略:
382
+
383
+ - `.code2skill/`
384
+ - `.code2skill-*/`
385
+ - `.pypi-smoke/`
386
+
387
+ 建议:
388
+
389
+ - 正式产物统一写到 `.code2skill/`
390
+ - 本地试跑、真人验收、不同模型对比时,用 `.code2skill-qwen-live/`、`.code2skill-test/` 这类命名
391
+ - 不要把测试生成的 `skills/` 目录提交到 Git
392
+ - 如果要在 PR 中查看结果,优先用 artifact,而不是直接提交生成文件
393
+
394
+ ## 这个项目内部是怎么完成的
395
+
396
+ 如果你想理解 `code2skill` 自己是如何工作的,可以从这些模块开始:
397
+
398
+ - `src/code2skill/scanner/`:文件发现、过滤、预算裁剪、优先级评分
399
+ - `src/code2skill/extractors/python_extractor.py`:Python AST 骨架提取
400
+ - `src/code2skill/import_graph.py`:仓库内 import graph
401
+ - `src/code2skill/pattern_detector.py`:同角色文件模式检测
402
+ - `src/code2skill/analyzers/skill_blueprint_builder.py`:把扫描结果组装成 `SkillBlueprint`
403
+ - `src/code2skill/skill_planner.py`:生成 `skill-plan.json`
404
+ - `src/code2skill/skill_generator.py`:生成和增量修订 `skills/*.md`
405
+ - `src/code2skill/core.py`:统一编排 `scan / estimate / ci`
406
+
407
+ 推荐阅读顺序:
408
+
409
+ 1. `cli.py`
410
+ 2. `core.py`
411
+ 3. `scanner/` 与 `extractors/`
412
+ 4. `analyzers/`
413
+ 5. `skill_planner.py`
414
+ 6. `skill_generator.py`
415
+ 7. `adapt.py`
416
+
417
+ ## 发布检查清单
418
+
419
+ 开发与发布前推荐跑:
420
+
421
+ ```bash
422
+ pip install -e .[dev]
423
+ python -m pytest tests -q
424
+ python -m build
425
+ python -m twine check dist/code2skill-*.tar.gz dist/code2skill-*.whl
426
+ ```
427
+
428
+ ## 当前边界
429
+
430
+ - 目前只面向 Python 仓库
431
+ - 生成的 Skill 已适合辅助编码与审查,但不应被当作绝对事实
432
+ - 增量更新依赖历史状态文件与可用 diff
433
+ - `report.json` 中部分影响摘要仍带启发式成分,最终以 `skill-plan.json` 和生成出来的 `skills/*.md` 为准
434
+
435
+ ## English Quick Reference
436
+
437
+ ### What It Does
438
+
439
+ `code2skill` turns a Python repository into:
440
+
441
+ - a structural blueprint
442
+ - a skill plan
443
+ - generated skill markdown files
444
+ - cached state for incremental CI/CD runs
445
+
446
+ ### Quick Start
447
+
448
+ From the target repo root:
449
+
450
+ ```bash
451
+ export QWEN_API_KEY=...
452
+ export CODE2SKILL_LLM=qwen
453
+ export CODE2SKILL_MODEL=qwen-plus-latest
454
+ code2skill scan
455
+ ```
456
+
457
+ PowerShell:
458
+
459
+ ```powershell
460
+ $env:QWEN_API_KEY="..."
461
+ $env:CODE2SKILL_LLM="qwen"
462
+ $env:CODE2SKILL_MODEL="qwen-plus-latest"
463
+ code2skill scan
464
+ ```
465
+
466
+ ### Main Commands
467
+
468
+ ```bash
469
+ code2skill scan
470
+ code2skill scan --structure-only
471
+ code2skill ci --mode auto --base-ref origin/main
472
+ code2skill estimate
473
+ code2skill adapt --target codex --source-dir .code2skill/skills
474
+ ```
475
+
476
+ ### Incremental CI Requirements
477
+
478
+ Restore:
479
+
480
+ - `.code2skill/state/analysis-state.json`
481
+ - `.code2skill/skill-plan.json`
482
+ - preferably `.code2skill/skills/`
483
+
484
+ If they are missing, `ci --mode auto` falls back to a full run.
485
+
486
+ ### Release Validation
487
+
488
+ ```bash
489
+ pip install -e .[dev]
490
+ python -m pytest tests -q
491
+ python -m build
492
+ python -m twine check dist/code2skill-*.tar.gz dist/code2skill-*.whl
493
+ ```
494
+
495
+ ## License
496
+
497
+ Apache-2.0