clip-hier 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ build/
5
+ dist/
6
+ .eggs/
7
+ .venv/
8
+ venv/
9
+ .pytest_cache/
10
+ .ruff_cache/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 songtang209
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.4
2
+ Name: clip-hier
3
+ Version: 0.1.0
4
+ Summary: 图像标注 GT JSON(Picture_name/Tag/TagOption)读写工具集
5
+ Author: songtang209
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: annotation,clip,ground-truth,image-classification,tagging
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+
20
+ # clip-hier
21
+
22
+ 图像标注 **GT JSON**(`Picture_name` / `Tag` / `TagOption`)的轻量读写工具集,零第三方依赖。
23
+
24
+ 适用于这种标注格式:
25
+
26
+ ```json
27
+ { "Picture_name": "a.jpg", "Tag": "Birds", "TagOption": "OutdoorScenes|SunriseSunset" }
28
+ ```
29
+
30
+ - `Tag`:必选标签(`|` 分隔)。
31
+ - `TagOption`:疑似/可接受标签(`|` 分隔)。
32
+
33
+ ## 安装
34
+
35
+ ```bash
36
+ pip install clip-hier
37
+ ```
38
+
39
+ ## 用法
40
+
41
+ ### 读取目录
42
+
43
+ ```python
44
+ from clip_hier import load_gt_tags
45
+
46
+ gt = load_gt_tags("path/to/gt_json_dir")
47
+ # {"a.jpg": {"path": ".../a.json", "tag": {"Birds"}, "tag_option": {"OutdoorScenes", "SunriseSunset"}}}
48
+ ```
49
+
50
+ ### 把 TagOption 提升为 Tag(并写回文件)
51
+
52
+ 当某个标签命中一张图的 `TagOption` 且不在 `Tag` 时,把它移入 `Tag` 并改写源 JSON:
53
+
54
+ ```python
55
+ from clip_hier import GroundTruthTags
56
+
57
+ gt = GroundTruthTags("path/to/gt_json_dir")
58
+
59
+ # 推理得到某图 top1 预测后:
60
+ changed = gt.promote_if_option("a.jpg", "OutdoorScenes")
61
+ # 命中 TagOption -> 移入 Tag、写回磁盘,返回 True;否则 False
62
+ ```
63
+
64
+ 也可使用底层函数:
65
+
66
+ ```python
67
+ from clip_hier import load_gt_tags, promote_tag_option
68
+
69
+ gt = load_gt_tags("path/to/gt_json_dir")
70
+ promote_tag_option(gt["a.jpg"], "OutdoorScenes")
71
+ ```
72
+
73
+ ## API
74
+
75
+ | 名称 | 说明 |
76
+ | --- | --- |
77
+ | `load_gt_tags(json_dir)` | 读目录,返回 `{Picture_name: {"path", "tag", "tag_option"}}`(`tag`/`tag_option` 为 `set`) |
78
+ | `promote_tag_option(entry, name)` | 把 `name` 从 `entry` 的 TagOption 移到 Tag,写回 JSON,返回是否改动 |
79
+ | `GroundTruthTags(json_dir)` | 目录的封装:`get(name)`、`promote_if_option(name, tag)`、`in`、`len` |
80
+
81
+ 写回保留 JSON 中的其它字段;写盘成功后才同步内存,保证内存与磁盘一致。
82
+
83
+ ## License
84
+
85
+ MIT
@@ -0,0 +1,66 @@
1
+ # clip-hier
2
+
3
+ 图像标注 **GT JSON**(`Picture_name` / `Tag` / `TagOption`)的轻量读写工具集,零第三方依赖。
4
+
5
+ 适用于这种标注格式:
6
+
7
+ ```json
8
+ { "Picture_name": "a.jpg", "Tag": "Birds", "TagOption": "OutdoorScenes|SunriseSunset" }
9
+ ```
10
+
11
+ - `Tag`:必选标签(`|` 分隔)。
12
+ - `TagOption`:疑似/可接受标签(`|` 分隔)。
13
+
14
+ ## 安装
15
+
16
+ ```bash
17
+ pip install clip-hier
18
+ ```
19
+
20
+ ## 用法
21
+
22
+ ### 读取目录
23
+
24
+ ```python
25
+ from clip_hier import load_gt_tags
26
+
27
+ gt = load_gt_tags("path/to/gt_json_dir")
28
+ # {"a.jpg": {"path": ".../a.json", "tag": {"Birds"}, "tag_option": {"OutdoorScenes", "SunriseSunset"}}}
29
+ ```
30
+
31
+ ### 把 TagOption 提升为 Tag(并写回文件)
32
+
33
+ 当某个标签命中一张图的 `TagOption` 且不在 `Tag` 时,把它移入 `Tag` 并改写源 JSON:
34
+
35
+ ```python
36
+ from clip_hier import GroundTruthTags
37
+
38
+ gt = GroundTruthTags("path/to/gt_json_dir")
39
+
40
+ # 推理得到某图 top1 预测后:
41
+ changed = gt.promote_if_option("a.jpg", "OutdoorScenes")
42
+ # 命中 TagOption -> 移入 Tag、写回磁盘,返回 True;否则 False
43
+ ```
44
+
45
+ 也可使用底层函数:
46
+
47
+ ```python
48
+ from clip_hier import load_gt_tags, promote_tag_option
49
+
50
+ gt = load_gt_tags("path/to/gt_json_dir")
51
+ promote_tag_option(gt["a.jpg"], "OutdoorScenes")
52
+ ```
53
+
54
+ ## API
55
+
56
+ | 名称 | 说明 |
57
+ | --- | --- |
58
+ | `load_gt_tags(json_dir)` | 读目录,返回 `{Picture_name: {"path", "tag", "tag_option"}}`(`tag`/`tag_option` 为 `set`) |
59
+ | `promote_tag_option(entry, name)` | 把 `name` 从 `entry` 的 TagOption 移到 Tag,写回 JSON,返回是否改动 |
60
+ | `GroundTruthTags(json_dir)` | 目录的封装:`get(name)`、`promote_if_option(name, tag)`、`in`、`len` |
61
+
62
+ 写回保留 JSON 中的其它字段;写盘成功后才同步内存,保证内存与磁盘一致。
63
+
64
+ ## License
65
+
66
+ MIT
@@ -0,0 +1,29 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "clip-hier"
7
+ version = "0.1.0"
8
+ description = "图像标注 GT JSON(Picture_name/Tag/TagOption)读写工具集"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "songtang209" }]
13
+ keywords = ["ground-truth", "annotation", "tagging", "clip", "image-classification"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Intended Audience :: Science/Research",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3 :: Only",
22
+ "Topic :: Scientific/Engineering :: Image Recognition",
23
+ ]
24
+
25
+ [tool.hatch.build.targets.wheel]
26
+ packages = ["src/clip_hier"]
27
+
28
+ [tool.hatch.build.targets.sdist]
29
+ include = ["src", "README.md", "LICENSE"]
@@ -0,0 +1,14 @@
1
+ """clip-hier:图像标注 GT JSON(Picture_name / Tag / TagOption)读写工具集。"""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .gt import GroundTruthTags, load_gt_tags, promote_tag_option
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ __all__ = [
10
+ "GroundTruthTags",
11
+ "load_gt_tags",
12
+ "promote_tag_option",
13
+ "__version__",
14
+ ]
@@ -0,0 +1,102 @@
1
+ """图像标注 GT JSON(Picture_name / Tag / TagOption)的读写工具。
2
+
3
+ 每个 GT JSON 形如:
4
+
5
+ {"Picture_name": "a.jpg", "Tag": "Birds", "TagOption": "OutdoorScenes|SunriseSunset"}
6
+
7
+ 其中 ``Tag`` / ``TagOption`` 均为 "|" 分隔的字符串,可缺省:
8
+
9
+ - ``Tag``:必选标签(参与召回评测)。
10
+ - ``TagOption``:疑似/可接受标签(精度免责)。
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ from typing import Dict, Optional
18
+
19
+
20
+ def _split_tags(value: str) -> list:
21
+ return [t.strip() for t in value.split("|") if t.strip()]
22
+
23
+
24
+ def load_gt_tags(json_dir: str) -> Dict[str, dict]:
25
+ """读取 GT JSON 目录,返回 ``{Picture_name: {"path", "tag", "tag_option"}}``。
26
+
27
+ ``tag`` / ``tag_option`` 为 ``set``,``path`` 为该条对应的 JSON 文件路径
28
+ (便于后续写回)。无 ``Picture_name`` 的文件会被跳过。
29
+ """
30
+ gt: Dict[str, dict] = {}
31
+ for fname in os.listdir(json_dir):
32
+ if not fname.lower().endswith(".json"):
33
+ continue
34
+ fpath = os.path.join(json_dir, fname)
35
+ with open(fpath, "r", encoding="utf-8") as f:
36
+ data = json.load(f)
37
+ name = data.get("Picture_name", "").strip()
38
+ if not name:
39
+ continue
40
+ gt[name] = {
41
+ "path": fpath,
42
+ "tag": set(_split_tags(data.get("Tag", ""))),
43
+ "tag_option": set(_split_tags(data.get("TagOption", ""))),
44
+ }
45
+ return gt
46
+
47
+
48
+ def promote_tag_option(entry: dict, name: str) -> bool:
49
+ """把 ``name`` 从 ``entry`` 的 TagOption 提升到 Tag,并写回对应 JSON 文件。
50
+
51
+ 仅当 ``name`` 当前在 ``tag_option`` 且不在 ``tag`` 时才动作。写盘成功后才
52
+ 同步内存中的 ``entry["tag"]`` / ``entry["tag_option"]``,保证内存与磁盘一致。
53
+
54
+ 返回是否真正发生了改动(无需改动或写盘失败均返回 ``False``)。
55
+ """
56
+ if name in entry["tag"] or name not in entry["tag_option"]:
57
+ return False
58
+ try:
59
+ with open(entry["path"], "r", encoding="utf-8") as f:
60
+ data = json.load(f)
61
+ tags = _split_tags(data.get("Tag", ""))
62
+ opts = _split_tags(data.get("TagOption", ""))
63
+ if name not in tags:
64
+ tags.append(name)
65
+ opts = [t for t in opts if t != name]
66
+ data["Tag"] = "|".join(tags)
67
+ data["TagOption"] = "|".join(opts)
68
+ with open(entry["path"], "w", encoding="utf-8") as f:
69
+ json.dump(data, f, ensure_ascii=False, indent=2)
70
+ except Exception:
71
+ return False
72
+ entry["tag_option"].discard(name)
73
+ entry["tag"].add(name)
74
+ return True
75
+
76
+
77
+ class GroundTruthTags:
78
+ """加载一个 GT JSON 目录,并支持按预测把 TagOption 提升为 Tag。"""
79
+
80
+ def __init__(self, json_dir: str):
81
+ self.json_dir = json_dir
82
+ self.entries = load_gt_tags(json_dir)
83
+
84
+ def __len__(self) -> int:
85
+ return len(self.entries)
86
+
87
+ def __contains__(self, picture_name: str) -> bool:
88
+ return picture_name in self.entries
89
+
90
+ def get(self, picture_name: str) -> Optional[dict]:
91
+ """按 ``Picture_name`` 取出条目,不存在返回 ``None``。"""
92
+ return self.entries.get(picture_name)
93
+
94
+ def promote_if_option(self, picture_name: str, tag: str) -> bool:
95
+ """若 ``tag`` 命中该图 TagOption(且不在 Tag)则提升并写回 JSON。
96
+
97
+ 图片不存在或无需改动时返回 ``False``。
98
+ """
99
+ entry = self.entries.get(picture_name)
100
+ if entry is None:
101
+ return False
102
+ return promote_tag_option(entry, tag)