clip-hier 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clip_hier-0.1.0/.gitignore +10 -0
- clip_hier-0.1.0/LICENSE +21 -0
- clip_hier-0.1.0/PKG-INFO +85 -0
- clip_hier-0.1.0/README.md +66 -0
- clip_hier-0.1.0/pyproject.toml +29 -0
- clip_hier-0.1.0/src/clip_hier/__init__.py +14 -0
- clip_hier-0.1.0/src/clip_hier/gt.py +102 -0
clip_hier-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 songtang209
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
clip_hier-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clip-hier
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 图像标注 GT JSON(Picture_name/Tag/TagOption)读写工具集
|
|
5
|
+
Author: songtang209
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: annotation,clip,ground-truth,image-classification,tagging
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
17
|
+
Requires-Python: >=3.8
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# clip-hier
|
|
21
|
+
|
|
22
|
+
图像标注 **GT JSON**(`Picture_name` / `Tag` / `TagOption`)的轻量读写工具集,零第三方依赖。
|
|
23
|
+
|
|
24
|
+
适用于这种标注格式:
|
|
25
|
+
|
|
26
|
+
```json
|
|
27
|
+
{ "Picture_name": "a.jpg", "Tag": "Birds", "TagOption": "OutdoorScenes|SunriseSunset" }
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
- `Tag`:必选标签(`|` 分隔)。
|
|
31
|
+
- `TagOption`:疑似/可接受标签(`|` 分隔)。
|
|
32
|
+
|
|
33
|
+
## 安装
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install clip-hier
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## 用法
|
|
40
|
+
|
|
41
|
+
### 读取目录
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from clip_hier import load_gt_tags
|
|
45
|
+
|
|
46
|
+
gt = load_gt_tags("path/to/gt_json_dir")
|
|
47
|
+
# {"a.jpg": {"path": ".../a.json", "tag": {"Birds"}, "tag_option": {"OutdoorScenes", "SunriseSunset"}}}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 把 TagOption 提升为 Tag(并写回文件)
|
|
51
|
+
|
|
52
|
+
当某个标签命中一张图的 `TagOption` 且不在 `Tag` 时,把它移入 `Tag` 并改写源 JSON:
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from clip_hier import GroundTruthTags
|
|
56
|
+
|
|
57
|
+
gt = GroundTruthTags("path/to/gt_json_dir")
|
|
58
|
+
|
|
59
|
+
# 推理得到某图 top1 预测后:
|
|
60
|
+
changed = gt.promote_if_option("a.jpg", "OutdoorScenes")
|
|
61
|
+
# 命中 TagOption -> 移入 Tag、写回磁盘,返回 True;否则 False
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
也可使用底层函数:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from clip_hier import load_gt_tags, promote_tag_option
|
|
68
|
+
|
|
69
|
+
gt = load_gt_tags("path/to/gt_json_dir")
|
|
70
|
+
promote_tag_option(gt["a.jpg"], "OutdoorScenes")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## API
|
|
74
|
+
|
|
75
|
+
| 名称 | 说明 |
|
|
76
|
+
| --- | --- |
|
|
77
|
+
| `load_gt_tags(json_dir)` | 读目录,返回 `{Picture_name: {"path", "tag", "tag_option"}}`(`tag`/`tag_option` 为 `set`) |
|
|
78
|
+
| `promote_tag_option(entry, name)` | 把 `name` 从 `entry` 的 TagOption 移到 Tag,写回 JSON,返回是否改动 |
|
|
79
|
+
| `GroundTruthTags(json_dir)` | 目录的封装:`get(name)`、`promote_if_option(name, tag)`、`in`、`len` |
|
|
80
|
+
|
|
81
|
+
写回保留 JSON 中的其它字段;写盘成功后才同步内存,保证内存与磁盘一致。
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
MIT
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# clip-hier
|
|
2
|
+
|
|
3
|
+
图像标注 **GT JSON**(`Picture_name` / `Tag` / `TagOption`)的轻量读写工具集,零第三方依赖。
|
|
4
|
+
|
|
5
|
+
适用于这种标注格式:
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{ "Picture_name": "a.jpg", "Tag": "Birds", "TagOption": "OutdoorScenes|SunriseSunset" }
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
- `Tag`:必选标签(`|` 分隔)。
|
|
12
|
+
- `TagOption`:疑似/可接受标签(`|` 分隔)。
|
|
13
|
+
|
|
14
|
+
## 安装
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install clip-hier
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 用法
|
|
21
|
+
|
|
22
|
+
### 读取目录
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from clip_hier import load_gt_tags
|
|
26
|
+
|
|
27
|
+
gt = load_gt_tags("path/to/gt_json_dir")
|
|
28
|
+
# {"a.jpg": {"path": ".../a.json", "tag": {"Birds"}, "tag_option": {"OutdoorScenes", "SunriseSunset"}}}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### 把 TagOption 提升为 Tag(并写回文件)
|
|
32
|
+
|
|
33
|
+
当某个标签命中一张图的 `TagOption` 且不在 `Tag` 时,把它移入 `Tag` 并改写源 JSON:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from clip_hier import GroundTruthTags
|
|
37
|
+
|
|
38
|
+
gt = GroundTruthTags("path/to/gt_json_dir")
|
|
39
|
+
|
|
40
|
+
# 推理得到某图 top1 预测后:
|
|
41
|
+
changed = gt.promote_if_option("a.jpg", "OutdoorScenes")
|
|
42
|
+
# 命中 TagOption -> 移入 Tag、写回磁盘,返回 True;否则 False
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
也可使用底层函数:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from clip_hier import load_gt_tags, promote_tag_option
|
|
49
|
+
|
|
50
|
+
gt = load_gt_tags("path/to/gt_json_dir")
|
|
51
|
+
promote_tag_option(gt["a.jpg"], "OutdoorScenes")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## API
|
|
55
|
+
|
|
56
|
+
| 名称 | 说明 |
|
|
57
|
+
| --- | --- |
|
|
58
|
+
| `load_gt_tags(json_dir)` | 读目录,返回 `{Picture_name: {"path", "tag", "tag_option"}}`(`tag`/`tag_option` 为 `set`) |
|
|
59
|
+
| `promote_tag_option(entry, name)` | 把 `name` 从 `entry` 的 TagOption 移到 Tag,写回 JSON,返回是否改动 |
|
|
60
|
+
| `GroundTruthTags(json_dir)` | 目录的封装:`get(name)`、`promote_if_option(name, tag)`、`in`、`len` |
|
|
61
|
+
|
|
62
|
+
写回保留 JSON 中的其它字段;写盘成功后才同步内存,保证内存与磁盘一致。
|
|
63
|
+
|
|
64
|
+
## License
|
|
65
|
+
|
|
66
|
+
MIT
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "clip-hier"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "图像标注 GT JSON(Picture_name/Tag/TagOption)读写工具集"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "songtang209" }]
|
|
13
|
+
keywords = ["ground-truth", "annotation", "tagging", "clip", "image-classification"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.hatch.build.targets.wheel]
|
|
26
|
+
packages = ["src/clip_hier"]
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.sdist]
|
|
29
|
+
include = ["src", "README.md", "LICENSE"]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""clip-hier:图像标注 GT JSON(Picture_name / Tag / TagOption)读写工具集。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .gt import GroundTruthTags, load_gt_tags, promote_tag_option
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"GroundTruthTags",
|
|
11
|
+
"load_gt_tags",
|
|
12
|
+
"promote_tag_option",
|
|
13
|
+
"__version__",
|
|
14
|
+
]
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""图像标注 GT JSON(Picture_name / Tag / TagOption)的读写工具。
|
|
2
|
+
|
|
3
|
+
每个 GT JSON 形如:
|
|
4
|
+
|
|
5
|
+
{"Picture_name": "a.jpg", "Tag": "Birds", "TagOption": "OutdoorScenes|SunriseSunset"}
|
|
6
|
+
|
|
7
|
+
其中 ``Tag`` / ``TagOption`` 均为 "|" 分隔的字符串,可缺省:
|
|
8
|
+
|
|
9
|
+
- ``Tag``:必选标签(参与召回评测)。
|
|
10
|
+
- ``TagOption``:疑似/可接受标签(精度免责)。
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
from typing import Dict, Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _split_tags(value: str) -> list:
|
|
21
|
+
return [t.strip() for t in value.split("|") if t.strip()]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load_gt_tags(json_dir: str) -> Dict[str, dict]:
|
|
25
|
+
"""读取 GT JSON 目录,返回 ``{Picture_name: {"path", "tag", "tag_option"}}``。
|
|
26
|
+
|
|
27
|
+
``tag`` / ``tag_option`` 为 ``set``,``path`` 为该条对应的 JSON 文件路径
|
|
28
|
+
(便于后续写回)。无 ``Picture_name`` 的文件会被跳过。
|
|
29
|
+
"""
|
|
30
|
+
gt: Dict[str, dict] = {}
|
|
31
|
+
for fname in os.listdir(json_dir):
|
|
32
|
+
if not fname.lower().endswith(".json"):
|
|
33
|
+
continue
|
|
34
|
+
fpath = os.path.join(json_dir, fname)
|
|
35
|
+
with open(fpath, "r", encoding="utf-8") as f:
|
|
36
|
+
data = json.load(f)
|
|
37
|
+
name = data.get("Picture_name", "").strip()
|
|
38
|
+
if not name:
|
|
39
|
+
continue
|
|
40
|
+
gt[name] = {
|
|
41
|
+
"path": fpath,
|
|
42
|
+
"tag": set(_split_tags(data.get("Tag", ""))),
|
|
43
|
+
"tag_option": set(_split_tags(data.get("TagOption", ""))),
|
|
44
|
+
}
|
|
45
|
+
return gt
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def promote_tag_option(entry: dict, name: str) -> bool:
|
|
49
|
+
"""把 ``name`` 从 ``entry`` 的 TagOption 提升到 Tag,并写回对应 JSON 文件。
|
|
50
|
+
|
|
51
|
+
仅当 ``name`` 当前在 ``tag_option`` 且不在 ``tag`` 时才动作。写盘成功后才
|
|
52
|
+
同步内存中的 ``entry["tag"]`` / ``entry["tag_option"]``,保证内存与磁盘一致。
|
|
53
|
+
|
|
54
|
+
返回是否真正发生了改动(无需改动或写盘失败均返回 ``False``)。
|
|
55
|
+
"""
|
|
56
|
+
if name in entry["tag"] or name not in entry["tag_option"]:
|
|
57
|
+
return False
|
|
58
|
+
try:
|
|
59
|
+
with open(entry["path"], "r", encoding="utf-8") as f:
|
|
60
|
+
data = json.load(f)
|
|
61
|
+
tags = _split_tags(data.get("Tag", ""))
|
|
62
|
+
opts = _split_tags(data.get("TagOption", ""))
|
|
63
|
+
if name not in tags:
|
|
64
|
+
tags.append(name)
|
|
65
|
+
opts = [t for t in opts if t != name]
|
|
66
|
+
data["Tag"] = "|".join(tags)
|
|
67
|
+
data["TagOption"] = "|".join(opts)
|
|
68
|
+
with open(entry["path"], "w", encoding="utf-8") as f:
|
|
69
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
70
|
+
except Exception:
|
|
71
|
+
return False
|
|
72
|
+
entry["tag_option"].discard(name)
|
|
73
|
+
entry["tag"].add(name)
|
|
74
|
+
return True
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class GroundTruthTags:
|
|
78
|
+
"""加载一个 GT JSON 目录,并支持按预测把 TagOption 提升为 Tag。"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, json_dir: str):
|
|
81
|
+
self.json_dir = json_dir
|
|
82
|
+
self.entries = load_gt_tags(json_dir)
|
|
83
|
+
|
|
84
|
+
def __len__(self) -> int:
|
|
85
|
+
return len(self.entries)
|
|
86
|
+
|
|
87
|
+
def __contains__(self, picture_name: str) -> bool:
|
|
88
|
+
return picture_name in self.entries
|
|
89
|
+
|
|
90
|
+
def get(self, picture_name: str) -> Optional[dict]:
|
|
91
|
+
"""按 ``Picture_name`` 取出条目,不存在返回 ``None``。"""
|
|
92
|
+
return self.entries.get(picture_name)
|
|
93
|
+
|
|
94
|
+
def promote_if_option(self, picture_name: str, tag: str) -> bool:
|
|
95
|
+
"""若 ``tag`` 命中该图 TagOption(且不在 Tag)则提升并写回 JSON。
|
|
96
|
+
|
|
97
|
+
图片不存在或无需改动时返回 ``False``。
|
|
98
|
+
"""
|
|
99
|
+
entry = self.entries.get(picture_name)
|
|
100
|
+
if entry is None:
|
|
101
|
+
return False
|
|
102
|
+
return promote_tag_option(entry, tag)
|