annofabcli 1.98.0__py3-none-any.whl → 1.99.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- annofabcli/__version__.py +1 -1
- annofabcli/annotation_specs/put_label_color.py +5 -0
- annofabcli/comment/delete_comment.py +3 -0
- annofabcli/comment/put_inspection_comment.py +3 -0
- annofabcli/comment/put_onhold_comment.py +3 -0
- annofabcli/input_data/change_input_data_name.py +4 -1
- annofabcli/input_data/put_input_data.py +4 -0
- annofabcli/statistics/list_annotation_attribute_filled_count.py +684 -0
- annofabcli/statistics/list_annotation_count.py +80 -37
- annofabcli/statistics/list_annotation_duration.py +18 -18
- annofabcli/statistics/subcommand_statistics.py +3 -0
- annofabcli/task/list_tasks_added_task_history.py +141 -156
- annofabcli/task/put_tasks.py +9 -4
- {annofabcli-1.98.0.dist-info → annofabcli-1.99.0.dist-info}/METADATA +1 -1
- {annofabcli-1.98.0.dist-info → annofabcli-1.99.0.dist-info}/RECORD +18 -17
- {annofabcli-1.98.0.dist-info → annofabcli-1.99.0.dist-info}/LICENSE +0 -0
- {annofabcli-1.98.0.dist-info → annofabcli-1.99.0.dist-info}/WHEEL +0 -0
- {annofabcli-1.98.0.dist-info → annofabcli-1.99.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,684 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import collections
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
import tempfile
|
|
9
|
+
import zipfile
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from collections.abc import Collection, Iterator
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from functools import partial
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Literal, Optional, Protocol, Union
|
|
17
|
+
|
|
18
|
+
import annofabapi
|
|
19
|
+
import pandas
|
|
20
|
+
from annofabapi.models import ProjectMemberRole
|
|
21
|
+
from annofabapi.parser import (
|
|
22
|
+
SimpleAnnotationParser,
|
|
23
|
+
lazy_parse_simple_annotation_dir,
|
|
24
|
+
lazy_parse_simple_annotation_zip,
|
|
25
|
+
)
|
|
26
|
+
from annofabapi.pydantic_models.additional_data_definition_type import AdditionalDataDefinitionType
|
|
27
|
+
from annofabapi.pydantic_models.task_phase import TaskPhase
|
|
28
|
+
from annofabapi.pydantic_models.task_status import TaskStatus
|
|
29
|
+
from dataclasses_json import DataClassJsonMixin, config
|
|
30
|
+
|
|
31
|
+
import annofabcli
|
|
32
|
+
import annofabcli.common.cli
|
|
33
|
+
from annofabcli.common.cli import (
|
|
34
|
+
COMMAND_LINE_ERROR_STATUS_CODE,
|
|
35
|
+
ArgumentParser,
|
|
36
|
+
CommandLine,
|
|
37
|
+
build_annofabapi_resource_and_login,
|
|
38
|
+
)
|
|
39
|
+
from annofabcli.common.download import DownloadingFile
|
|
40
|
+
from annofabcli.common.enums import FormatArgument
|
|
41
|
+
from annofabcli.common.facade import (
|
|
42
|
+
AnnofabApiFacade,
|
|
43
|
+
TaskQuery,
|
|
44
|
+
match_annotation_with_task_query,
|
|
45
|
+
)
|
|
46
|
+
from annofabcli.common.type_util import assert_noreturn
|
|
47
|
+
from annofabcli.common.utils import print_csv, print_json
|
|
48
|
+
from annofabcli.statistics.list_annotation_count import AnnotationSpecs
|
|
49
|
+
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
AttributeValueType = Literal["filled", "empty"]
|
|
54
|
+
AttributeValueKey = tuple[str, str, AttributeValueType]
|
|
55
|
+
"""
|
|
56
|
+
属性のキー.
|
|
57
|
+
tuple[label_name_en, attribute_name_en, filled | empty] で表す。
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
AttributeNameKey = tuple[str, str]
|
|
62
|
+
"""
|
|
63
|
+
属性名のキー.
|
|
64
|
+
tuple[label_name_en, attribute_name_en] で表す。
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
AttributeKeys = Collection[Collection]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class HasAnnotationAttributeCounts(Protocol):
|
|
72
|
+
annotation_attribute_counts: dict[AttributeValueKey, int]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class GroupBy(Enum):
|
|
76
|
+
TASK_ID = "task_id"
|
|
77
|
+
INPUT_DATA_ID = "input_data_id"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def encode_annotation_count_by_attribute(
|
|
81
|
+
annotation_count_by_attribute: dict[AttributeValueKey, int],
|
|
82
|
+
) -> dict[str, dict[str, dict[str, int]]]:
|
|
83
|
+
"""annotation_duration_second_by_attributeを `{label_name: {attribute_name: {attribute_value: annotation_count}}}`のdictに変換します。
|
|
84
|
+
JSONへの変換用関数です。
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def _factory() -> collections.defaultdict:
|
|
88
|
+
"""入れ子の辞書を利用できるようにするための関数"""
|
|
89
|
+
return collections.defaultdict(_factory)
|
|
90
|
+
|
|
91
|
+
result: dict[str, dict[str, dict[str, int]]] = defaultdict(_factory)
|
|
92
|
+
for (label_name, attribute_name, attribute_value_type), annotation_count in annotation_count_by_attribute.items():
|
|
93
|
+
result[label_name][attribute_name][attribute_value_type] = annotation_count
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass(frozen=True)
|
|
98
|
+
class AnnotationCountByInputData(DataClassJsonMixin, HasAnnotationAttributeCounts):
|
|
99
|
+
"""
|
|
100
|
+
入力データ単位のアノテーション数の情報。
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
task_id: str
|
|
104
|
+
task_status: TaskStatus
|
|
105
|
+
task_phase: TaskPhase
|
|
106
|
+
task_phase_stage: int
|
|
107
|
+
|
|
108
|
+
input_data_id: str
|
|
109
|
+
input_data_name: str
|
|
110
|
+
|
|
111
|
+
annotation_attribute_counts: dict[AttributeValueKey, int] = field(
|
|
112
|
+
metadata=config(
|
|
113
|
+
encoder=encode_annotation_count_by_attribute,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
"""属性値ごとのアノテーションの個数
|
|
117
|
+
key: tuple[ラベル名(英語),属性名(英語),属性値の種類], value: アノテーション数
|
|
118
|
+
"""
|
|
119
|
+
frame_no: Optional[int] = None
|
|
120
|
+
"""フレーム番号(1始まり)。アノテーションJSONには含まれていない情報なので、Optionalにする"""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass(frozen=True)
|
|
124
|
+
class AnnotationCountByTask(DataClassJsonMixin, HasAnnotationAttributeCounts):
|
|
125
|
+
"""
|
|
126
|
+
タスク単位のアノテーション数の情報。
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
task_id: str
|
|
130
|
+
task_status: TaskStatus
|
|
131
|
+
task_phase: TaskPhase
|
|
132
|
+
task_phase_stage: int
|
|
133
|
+
input_data_count: int
|
|
134
|
+
|
|
135
|
+
annotation_attribute_counts: dict[AttributeValueKey, int] = field(
|
|
136
|
+
metadata=config(
|
|
137
|
+
encoder=encode_annotation_count_by_attribute,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
"""属性値ごとのアノテーションの個数
|
|
141
|
+
key: tuple[ラベル名(英語),属性名(英語),属性値の種類], value: アノテーション数
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def lazy_parse_simple_annotation_by_input_data(annotation_path: Path) -> Iterator[SimpleAnnotationParser]:
|
|
146
|
+
if not annotation_path.exists():
|
|
147
|
+
raise RuntimeError(f"'{annotation_path}' は存在しません。")
|
|
148
|
+
|
|
149
|
+
if annotation_path.is_dir():
|
|
150
|
+
return lazy_parse_simple_annotation_dir(annotation_path)
|
|
151
|
+
elif zipfile.is_zipfile(str(annotation_path)):
|
|
152
|
+
return lazy_parse_simple_annotation_zip(annotation_path)
|
|
153
|
+
else:
|
|
154
|
+
raise RuntimeError(f"'{annotation_path}'は、zipファイルまたはディレクトリではありません。")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def convert_annotation_count_list_by_input_data_to_by_task(annotation_count_list: list[AnnotationCountByInputData]) -> list[AnnotationCountByTask]:
|
|
158
|
+
"""
|
|
159
|
+
入力データ単位のアノテーション数情報をタスク単位のアノテーション数情報に変換する
|
|
160
|
+
"""
|
|
161
|
+
tmp_dict: dict[str, list[AnnotationCountByInputData]] = collections.defaultdict(list)
|
|
162
|
+
for annotation_count in annotation_count_list:
|
|
163
|
+
tmp_dict[annotation_count.task_id].append(annotation_count)
|
|
164
|
+
|
|
165
|
+
result = []
|
|
166
|
+
for task_id, annotation_count_list_by_input_data in tmp_dict.items():
|
|
167
|
+
first_elm = annotation_count_list_by_input_data[0]
|
|
168
|
+
input_data_count = len(annotation_count_list_by_input_data)
|
|
169
|
+
|
|
170
|
+
annotation_attribute_counts: dict[AttributeValueKey, int] = defaultdict(int)
|
|
171
|
+
for elm in annotation_count_list_by_input_data:
|
|
172
|
+
for key, value in elm.annotation_attribute_counts.items():
|
|
173
|
+
annotation_attribute_counts[key] += value
|
|
174
|
+
|
|
175
|
+
result.append(
|
|
176
|
+
AnnotationCountByTask(
|
|
177
|
+
task_id=task_id,
|
|
178
|
+
task_status=first_elm.task_status,
|
|
179
|
+
task_phase=first_elm.task_phase,
|
|
180
|
+
task_phase_stage=first_elm.task_phase_stage,
|
|
181
|
+
input_data_count=input_data_count,
|
|
182
|
+
annotation_attribute_counts=annotation_attribute_counts,
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
return result
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class ListAnnotationCounterByInputData:
|
|
189
|
+
"""入力データ単位で、ラベルごと/属性ごとのアノテーション数を集計情報を取得するメソッドの集まり。
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
target_labels: 集計対象のラベル(label_name_en)
|
|
193
|
+
target_attribute_names: 集計対象の属性名
|
|
194
|
+
non_target_labels: 集計対象外のラベル
|
|
195
|
+
non_target_attribute_names: 集計対象外の属性名のキー。
|
|
196
|
+
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
def __init__(
|
|
200
|
+
self,
|
|
201
|
+
*,
|
|
202
|
+
target_labels: Optional[Collection[str]] = None,
|
|
203
|
+
non_target_labels: Optional[Collection[str]] = None,
|
|
204
|
+
target_attribute_names: Optional[Collection[AttributeNameKey]] = None,
|
|
205
|
+
non_target_attribute_names: Optional[Collection[AttributeNameKey]] = None,
|
|
206
|
+
frame_no_map: Optional[dict[tuple[str, str], int]] = None,
|
|
207
|
+
) -> None:
|
|
208
|
+
self.target_labels = set(target_labels) if target_labels is not None else None
|
|
209
|
+
self.target_attribute_names = set(target_attribute_names) if target_attribute_names is not None else None
|
|
210
|
+
self.non_target_labels = set(non_target_labels) if non_target_labels is not None else None
|
|
211
|
+
self.non_target_attribute_names = set(non_target_attribute_names) if non_target_attribute_names is not None else None
|
|
212
|
+
self.frame_no_map = frame_no_map
|
|
213
|
+
|
|
214
|
+
def get_annotation_count(self, simple_annotation: dict[str, Any]) -> AnnotationCountByInputData:
|
|
215
|
+
"""
|
|
216
|
+
1個のアノテーションJSONに対して、属性値ごとのアノテーション数を取得します。
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
simple_annotation: アノテーションJSONファイルの内容
|
|
220
|
+
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
def convert_attribute_value_to_type(value: Optional[Union[bool, str, float]]) -> AttributeValueType: # noqa: FBT001
|
|
224
|
+
"""
|
|
225
|
+
アノテーションJSONに格納されている属性値を、dict用のkeyに変換する。
|
|
226
|
+
|
|
227
|
+
Notes:
|
|
228
|
+
アノテーションJSONに格納されている属性値の型はbool, str, floatの3つ
|
|
229
|
+
|
|
230
|
+
"""
|
|
231
|
+
if value is None:
|
|
232
|
+
return "empty"
|
|
233
|
+
|
|
234
|
+
if isinstance(value, str) and value == "":
|
|
235
|
+
return "empty"
|
|
236
|
+
|
|
237
|
+
return "filled"
|
|
238
|
+
|
|
239
|
+
details: list[dict[str, Any]] = simple_annotation["details"]
|
|
240
|
+
|
|
241
|
+
annotation_count_by_attribute: dict[AttributeValueKey, int] = defaultdict(int)
|
|
242
|
+
for detail in details:
|
|
243
|
+
label = detail["label"]
|
|
244
|
+
|
|
245
|
+
if self.target_labels is not None and label not in self.target_labels:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
if self.non_target_labels is not None and label in self.non_target_labels:
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
for attribute_name, attribute_value in detail["attributes"].items():
|
|
252
|
+
if self.target_attribute_names is not None and (label, attribute_name) not in self.target_attribute_names:
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
if self.non_target_attribute_names is not None and (label, attribute_name) in self.non_target_attribute_names:
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
attribute_key = (label, attribute_name, convert_attribute_value_to_type(attribute_value))
|
|
259
|
+
annotation_count_by_attribute[attribute_key] += 1
|
|
260
|
+
|
|
261
|
+
frame_no: Optional[int] = None
|
|
262
|
+
if self.frame_no_map is not None:
|
|
263
|
+
frame_no = self.frame_no_map.get((simple_annotation["task_id"], simple_annotation["input_data_id"]))
|
|
264
|
+
|
|
265
|
+
return AnnotationCountByInputData(
|
|
266
|
+
task_id=simple_annotation["task_id"],
|
|
267
|
+
task_phase=TaskPhase(simple_annotation["task_phase"]),
|
|
268
|
+
task_phase_stage=simple_annotation["task_phase_stage"],
|
|
269
|
+
task_status=TaskStatus(simple_annotation["task_status"]),
|
|
270
|
+
input_data_id=simple_annotation["input_data_id"],
|
|
271
|
+
input_data_name=simple_annotation["input_data_name"],
|
|
272
|
+
annotation_attribute_counts=annotation_count_by_attribute,
|
|
273
|
+
frame_no=frame_no,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def get_annotation_count_list(
|
|
277
|
+
self,
|
|
278
|
+
annotation_path: Path,
|
|
279
|
+
*,
|
|
280
|
+
target_task_ids: Optional[Collection[str]] = None,
|
|
281
|
+
task_query: Optional[TaskQuery] = None,
|
|
282
|
+
) -> list[AnnotationCountByInputData]:
|
|
283
|
+
"""
|
|
284
|
+
アノテーションzipまたはそれを展開したディレクトリから、属性値ごとのアノテーション数を取得する。
|
|
285
|
+
|
|
286
|
+
"""
|
|
287
|
+
annotation_duration_list = []
|
|
288
|
+
|
|
289
|
+
target_task_ids = set(target_task_ids) if target_task_ids is not None else None
|
|
290
|
+
|
|
291
|
+
iter_parser = lazy_parse_simple_annotation_by_input_data(annotation_path)
|
|
292
|
+
|
|
293
|
+
logger.debug("アノテーションzip/ディレクトリを読み込み中")
|
|
294
|
+
for index, parser in enumerate(iter_parser):
|
|
295
|
+
if (index + 1) % 1000 == 0:
|
|
296
|
+
logger.debug(f"{index + 1} 件目のJSONを読み込み中")
|
|
297
|
+
|
|
298
|
+
if target_task_ids is not None and parser.task_id not in target_task_ids:
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
simple_annotation_dict = parser.load_json()
|
|
302
|
+
if task_query is not None: # noqa: SIM102
|
|
303
|
+
if not match_annotation_with_task_query(simple_annotation_dict, task_query):
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
annotation_count = self.get_annotation_count(simple_annotation_dict)
|
|
307
|
+
annotation_duration_list.append(annotation_count)
|
|
308
|
+
|
|
309
|
+
return annotation_duration_list
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class AnnotationCountCsvByAttribute:
|
|
313
|
+
"""
|
|
314
|
+
属性値ごとのアノテーション数をCSVに出力するためのクラス
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
selective_attribute_value_max_count: 選択肢系の属性の値の個数の上限。これを超えた場合は、非選択肢系属性(トラッキングIDやアノテーションリンクなど)とみなす
|
|
318
|
+
|
|
319
|
+
""" # noqa: E501
|
|
320
|
+
|
|
321
|
+
def __init__(self, selective_attribute_value_max_count: int = 20) -> None:
|
|
322
|
+
self.selective_attribute_value_max_count = selective_attribute_value_max_count
|
|
323
|
+
|
|
324
|
+
def _value_columns(
|
|
325
|
+
self, annotation_count_list: Collection[HasAnnotationAttributeCounts], *, prior_attribute_columns: Optional[list[tuple[str, str, str]]]
|
|
326
|
+
) -> list[tuple[str, str, str]]:
|
|
327
|
+
"""
|
|
328
|
+
CSVの数値列を取得します。
|
|
329
|
+
"""
|
|
330
|
+
all_attr_key_set = {attr_key for c in annotation_count_list for attr_key in c.annotation_attribute_counts}
|
|
331
|
+
if prior_attribute_columns is not None:
|
|
332
|
+
remaining_columns = sorted(all_attr_key_set - set(prior_attribute_columns)) # type: ignore[arg-type]
|
|
333
|
+
value_columns = prior_attribute_columns + remaining_columns
|
|
334
|
+
|
|
335
|
+
else:
|
|
336
|
+
value_columns = sorted(all_attr_key_set)
|
|
337
|
+
|
|
338
|
+
# 重複している場合は、重複要素を取り除く。ただし元の順番は維持する
|
|
339
|
+
value_columns = list(dict.fromkeys(value_columns).keys())
|
|
340
|
+
return value_columns
|
|
341
|
+
|
|
342
|
+
def get_columns_by_input_data(
|
|
343
|
+
self,
|
|
344
|
+
annotation_count_list: list[AnnotationCountByInputData],
|
|
345
|
+
prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
|
|
346
|
+
) -> list[tuple[str, str, str]]:
|
|
347
|
+
basic_columns = [
|
|
348
|
+
("task_id", "", ""),
|
|
349
|
+
("task_status", "", ""),
|
|
350
|
+
("task_phase", "", ""),
|
|
351
|
+
("task_phase_stage", "", ""),
|
|
352
|
+
("input_data_id", "", ""),
|
|
353
|
+
("input_data_name", "", ""),
|
|
354
|
+
("frame_no", "", ""),
|
|
355
|
+
]
|
|
356
|
+
value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
|
|
357
|
+
return basic_columns + value_columns
|
|
358
|
+
|
|
359
|
+
def get_columns_by_task(
|
|
360
|
+
self,
|
|
361
|
+
annotation_count_list: list[AnnotationCountByTask],
|
|
362
|
+
prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
|
|
363
|
+
) -> list[tuple[str, str, str]]:
|
|
364
|
+
basic_columns = [
|
|
365
|
+
("task_id", "", ""),
|
|
366
|
+
("task_status", "", ""),
|
|
367
|
+
("task_phase", "", ""),
|
|
368
|
+
("task_phase_stage", "", ""),
|
|
369
|
+
("input_data_count", "", ""),
|
|
370
|
+
]
|
|
371
|
+
value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
|
|
372
|
+
return basic_columns + value_columns
|
|
373
|
+
|
|
374
|
+
def create_df_by_input_data(
|
|
375
|
+
self,
|
|
376
|
+
annotation_count_list: list[AnnotationCountByInputData],
|
|
377
|
+
*,
|
|
378
|
+
prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
|
|
379
|
+
) -> pandas.DataFrame:
|
|
380
|
+
def to_cell(c: AnnotationCountByInputData) -> dict[tuple[str, str, str], Any]:
|
|
381
|
+
cell: dict[tuple[str, str, str], Any] = {
|
|
382
|
+
("task_id", "", ""): c.task_id,
|
|
383
|
+
("task_status", "", ""): c.task_status.value,
|
|
384
|
+
("task_phase", "", ""): c.task_phase.value,
|
|
385
|
+
("task_phase_stage", "", ""): c.task_phase_stage,
|
|
386
|
+
("input_data_id", "", ""): c.input_data_id,
|
|
387
|
+
("input_data_name", "", ""): c.input_data_name,
|
|
388
|
+
("frame_no", "", ""): c.frame_no,
|
|
389
|
+
}
|
|
390
|
+
cell.update(c.annotation_attribute_counts) # type: ignore[arg-type]
|
|
391
|
+
|
|
392
|
+
return cell
|
|
393
|
+
|
|
394
|
+
columns = self.get_columns_by_input_data(annotation_count_list, prior_attribute_columns)
|
|
395
|
+
df = pandas.DataFrame([to_cell(e) for e in annotation_count_list], columns=pandas.MultiIndex.from_tuples(columns))
|
|
396
|
+
|
|
397
|
+
# アノテーション数の列のNaNを0に変換する
|
|
398
|
+
value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
|
|
399
|
+
df = df.fillna(dict.fromkeys(value_columns, 0))
|
|
400
|
+
return df
|
|
401
|
+
|
|
402
|
+
def create_df_by_task(
|
|
403
|
+
self,
|
|
404
|
+
annotation_count_list: list[AnnotationCountByTask],
|
|
405
|
+
*,
|
|
406
|
+
prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
|
|
407
|
+
) -> pandas.DataFrame:
|
|
408
|
+
def to_cell(c: AnnotationCountByTask) -> dict[tuple[str, str, str], Any]:
|
|
409
|
+
cell: dict[tuple[str, str, str], Any] = {
|
|
410
|
+
("task_id", "", ""): c.task_id,
|
|
411
|
+
("task_status", "", ""): c.task_status.value,
|
|
412
|
+
("task_phase", "", ""): c.task_phase.value,
|
|
413
|
+
("task_phase_stage", "", ""): c.task_phase_stage,
|
|
414
|
+
("input_data_count", "", ""): c.input_data_count,
|
|
415
|
+
}
|
|
416
|
+
cell.update(c.annotation_attribute_counts) # type: ignore[arg-type]
|
|
417
|
+
|
|
418
|
+
return cell
|
|
419
|
+
|
|
420
|
+
columns = self.get_columns_by_task(annotation_count_list, prior_attribute_columns)
|
|
421
|
+
df = pandas.DataFrame([to_cell(e) for e in annotation_count_list], columns=pandas.MultiIndex.from_tuples(columns))
|
|
422
|
+
|
|
423
|
+
# アノテーション数の列のNaNを0に変換する
|
|
424
|
+
value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
|
|
425
|
+
df = df.fillna(dict.fromkeys(value_columns, 0))
|
|
426
|
+
return df
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def get_frame_no_map(task_json_path: Path) -> dict[tuple[str, str], int]:
|
|
430
|
+
with task_json_path.open(encoding="utf-8") as f:
|
|
431
|
+
task_list = json.load(f)
|
|
432
|
+
|
|
433
|
+
result = {}
|
|
434
|
+
for task in task_list:
|
|
435
|
+
task_id = task["task_id"]
|
|
436
|
+
input_data_id_list = task["input_data_id_list"]
|
|
437
|
+
for index, input_data_id in enumerate(input_data_id_list):
|
|
438
|
+
# 画面に合わせて1始まりにする
|
|
439
|
+
result[(task_id, input_data_id)] = index + 1
|
|
440
|
+
return result
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def get_attribute_columns(attribute_names: list[tuple[str, str]]) -> list[tuple[str, str, str]]:
|
|
444
|
+
attribute_columns = [
|
|
445
|
+
(label_name, attribute_name, value_type) for label_name, attribute_name in attribute_names for value_type in ["filled", "empty"]
|
|
446
|
+
]
|
|
447
|
+
return attribute_columns
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
class ListAnnotationAttributeFilledCountMain:
|
|
451
|
+
def __init__(self, service: annofabapi.Resource) -> None:
|
|
452
|
+
self.service = service
|
|
453
|
+
|
|
454
|
+
def print_annotation_count_csv_by_input_data(
|
|
455
|
+
self, annotation_count_list: list[AnnotationCountByInputData], output_file: Path, *, attribute_names: Optional[list[tuple[str, str]]]
|
|
456
|
+
) -> None:
|
|
457
|
+
attribute_columns: Optional[list[tuple[str, str, str]]] = None
|
|
458
|
+
if attribute_names is not None:
|
|
459
|
+
attribute_columns = get_attribute_columns(attribute_names)
|
|
460
|
+
|
|
461
|
+
df = AnnotationCountCsvByAttribute().create_df_by_input_data(annotation_count_list, prior_attribute_columns=attribute_columns)
|
|
462
|
+
print_csv(df, output_file)
|
|
463
|
+
|
|
464
|
+
def print_annotation_count_csv_by_task(
|
|
465
|
+
self, annotation_count_list: list[AnnotationCountByTask], output_file: Path, *, attribute_names: Optional[list[tuple[str, str]]]
|
|
466
|
+
) -> None:
|
|
467
|
+
attribute_columns: Optional[list[tuple[str, str, str]]] = None
|
|
468
|
+
if attribute_names is not None:
|
|
469
|
+
attribute_columns = get_attribute_columns(attribute_names)
|
|
470
|
+
|
|
471
|
+
df = AnnotationCountCsvByAttribute().create_df_by_task(annotation_count_list, prior_attribute_columns=attribute_columns)
|
|
472
|
+
print_csv(df, output_file)
|
|
473
|
+
|
|
474
|
+
def print_annotation_count(
|
|
475
|
+
self,
|
|
476
|
+
annotation_path: Path,
|
|
477
|
+
output_file: Path,
|
|
478
|
+
group_by: GroupBy,
|
|
479
|
+
output_format: FormatArgument,
|
|
480
|
+
*,
|
|
481
|
+
project_id: Optional[str] = None,
|
|
482
|
+
include_flag_attribute: bool = False,
|
|
483
|
+
task_json_path: Optional[Path] = None,
|
|
484
|
+
target_task_ids: Optional[Collection[str]] = None,
|
|
485
|
+
task_query: Optional[TaskQuery] = None,
|
|
486
|
+
) -> None:
|
|
487
|
+
annotation_specs: Optional[AnnotationSpecs] = None
|
|
488
|
+
target_attribute_names: Optional[list[AttributeNameKey]] = None
|
|
489
|
+
if project_id is not None:
|
|
490
|
+
annotation_specs = AnnotationSpecs(self.service, project_id)
|
|
491
|
+
if not include_flag_attribute:
|
|
492
|
+
target_attribute_names = annotation_specs.attribute_name_keys(excluded_attribute_types=[AdditionalDataDefinitionType.FLAG])
|
|
493
|
+
|
|
494
|
+
frame_no_map = get_frame_no_map(task_json_path) if task_json_path is not None else None
|
|
495
|
+
|
|
496
|
+
annotation_count_list_by_input_data = ListAnnotationCounterByInputData(
|
|
497
|
+
frame_no_map=frame_no_map, target_attribute_names=target_attribute_names
|
|
498
|
+
).get_annotation_count_list(
|
|
499
|
+
annotation_path,
|
|
500
|
+
target_task_ids=target_task_ids,
|
|
501
|
+
task_query=task_query,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
if group_by == GroupBy.INPUT_DATA_ID:
|
|
505
|
+
logger.info(f"{len(annotation_count_list_by_input_data)} 件の入力データに含まれるアノテーション数の情報を出力します。")
|
|
506
|
+
if output_format == FormatArgument.CSV:
|
|
507
|
+
self.print_annotation_count_csv_by_input_data(
|
|
508
|
+
annotation_count_list_by_input_data, output_file=output_file, attribute_names=target_attribute_names
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
elif output_format in [FormatArgument.PRETTY_JSON, FormatArgument.JSON]:
|
|
512
|
+
json_is_pretty = output_format == FormatArgument.PRETTY_JSON
|
|
513
|
+
|
|
514
|
+
print_json(
|
|
515
|
+
[e.to_dict(encode_json=True) for e in annotation_count_list_by_input_data],
|
|
516
|
+
is_pretty=json_is_pretty,
|
|
517
|
+
output=output_file,
|
|
518
|
+
)
|
|
519
|
+
elif group_by == GroupBy.TASK_ID:
|
|
520
|
+
annotation_count_list_by_task = convert_annotation_count_list_by_input_data_to_by_task(annotation_count_list_by_input_data)
|
|
521
|
+
logger.info(f"{len(annotation_count_list_by_task)} 件のタスクに含まれるアノテーション数の情報を出力します。")
|
|
522
|
+
if output_format == FormatArgument.CSV:
|
|
523
|
+
self.print_annotation_count_csv_by_task(
|
|
524
|
+
annotation_count_list_by_task, output_file=output_file, attribute_names=target_attribute_names
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
elif output_format in [FormatArgument.PRETTY_JSON, FormatArgument.JSON]:
|
|
528
|
+
json_is_pretty = output_format == FormatArgument.PRETTY_JSON
|
|
529
|
+
|
|
530
|
+
print_json(
|
|
531
|
+
[e.to_dict(encode_json=True) for e in annotation_count_list_by_task],
|
|
532
|
+
is_pretty=json_is_pretty,
|
|
533
|
+
output=output_file,
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
else:
|
|
537
|
+
raise assert_noreturn(group_by)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
class ListAnnotationAttributeFilledCount(CommandLine):
|
|
541
|
+
COMMON_MESSAGE = "annofabcli annotation list_annotation_attribute_filled_count: error:"
|
|
542
|
+
|
|
543
|
+
def validate(self, args: argparse.Namespace) -> bool:
|
|
544
|
+
if args.project_id is None and args.annotation is None:
|
|
545
|
+
print( # noqa: T201
|
|
546
|
+
f"{self.COMMON_MESSAGE} argument --project_id: '--annotation'が未指定のときは、'--project_id' を指定してください。",
|
|
547
|
+
file=sys.stderr,
|
|
548
|
+
)
|
|
549
|
+
return False
|
|
550
|
+
|
|
551
|
+
return True
|
|
552
|
+
|
|
553
|
+
def main(self) -> None:
|
|
554
|
+
args = self.args
|
|
555
|
+
|
|
556
|
+
if not self.validate(args):
|
|
557
|
+
sys.exit(COMMAND_LINE_ERROR_STATUS_CODE)
|
|
558
|
+
|
|
559
|
+
project_id: Optional[str] = args.project_id
|
|
560
|
+
if project_id is not None:
|
|
561
|
+
super().validate_project(project_id, project_member_roles=[ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
|
|
562
|
+
|
|
563
|
+
annotation_path = Path(args.annotation) if args.annotation is not None else None
|
|
564
|
+
|
|
565
|
+
task_id_list = annofabcli.common.cli.get_list_from_args(args.task_id) if args.task_id is not None else None
|
|
566
|
+
task_query = TaskQuery.from_dict(annofabcli.common.cli.get_json_from_args(args.task_query)) if args.task_query is not None else None
|
|
567
|
+
|
|
568
|
+
group_by = GroupBy(args.group_by)
|
|
569
|
+
output_file: Path = args.output
|
|
570
|
+
output_format = FormatArgument(args.format)
|
|
571
|
+
main_obj = ListAnnotationAttributeFilledCountMain(self.service)
|
|
572
|
+
|
|
573
|
+
downloading_obj = DownloadingFile(self.service)
|
|
574
|
+
|
|
575
|
+
# `NamedTemporaryFile`を使わない理由: Windowsで`PermissionError`が発生するため
|
|
576
|
+
# https://qiita.com/yuji38kwmt/items/c6f50e1fc03dafdcdda0 参考
|
|
577
|
+
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
578
|
+
# タスク全件ファイルは、フレーム番号を参照するのに利用する
|
|
579
|
+
if project_id is not None and group_by == GroupBy.INPUT_DATA_ID:
|
|
580
|
+
# group_byで条件を絞り込んでいる理由:
|
|
581
|
+
# タスクIDで集計する際は、フレーム番号は出力しないので、タスク全件ファイルをダウンロードする必要はないため
|
|
582
|
+
task_json_path = Path(str_temp_dir) / f"{project_id}__task.json"
|
|
583
|
+
downloading_obj.download_task_json(
|
|
584
|
+
project_id,
|
|
585
|
+
dest_path=str(task_json_path),
|
|
586
|
+
)
|
|
587
|
+
else:
|
|
588
|
+
task_json_path = None
|
|
589
|
+
|
|
590
|
+
func = partial(
|
|
591
|
+
main_obj.print_annotation_count,
|
|
592
|
+
project_id=project_id,
|
|
593
|
+
task_json_path=task_json_path,
|
|
594
|
+
group_by=group_by,
|
|
595
|
+
output_format=output_format,
|
|
596
|
+
output_file=output_file,
|
|
597
|
+
target_task_ids=task_id_list,
|
|
598
|
+
task_query=task_query,
|
|
599
|
+
include_flag_attribute=args.include_flag_attribute,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
if annotation_path is None:
|
|
603
|
+
assert project_id is not None
|
|
604
|
+
annotation_path = Path(str_temp_dir) / f"{project_id}__annotation.zip"
|
|
605
|
+
downloading_obj.download_annotation_zip(
|
|
606
|
+
project_id,
|
|
607
|
+
dest_path=str(annotation_path),
|
|
608
|
+
is_latest=args.latest,
|
|
609
|
+
)
|
|
610
|
+
func(annotation_path=annotation_path)
|
|
611
|
+
else:
|
|
612
|
+
func(annotation_path=annotation_path)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def main(args: argparse.Namespace) -> None:
|
|
616
|
+
service = build_annofabapi_resource_and_login(args)
|
|
617
|
+
facade = AnnofabApiFacade(service)
|
|
618
|
+
ListAnnotationAttributeFilledCount(service, facade, args).main()
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
622
|
+
argument_parser = ArgumentParser(parser)
|
|
623
|
+
|
|
624
|
+
parser.add_argument(
|
|
625
|
+
"--annotation",
|
|
626
|
+
type=str,
|
|
627
|
+
help="アノテーションzip、またはzipを展開したディレクトリを指定します。指定しない場合はAnnofabからダウンロードします。",
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
parser.add_argument(
|
|
631
|
+
"-p",
|
|
632
|
+
"--project_id",
|
|
633
|
+
type=str,
|
|
634
|
+
help="project_id。``--annotation`` が未指定のときは必須です。``--annotation`` が指定されているときに ``--project_id`` を指定すると、アノテーション仕様を参照して、集計対象の属性やCSV列順が決まります。", # noqa: E501
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
parser.add_argument(
|
|
638
|
+
"--group_by",
|
|
639
|
+
type=str,
|
|
640
|
+
choices=[GroupBy.TASK_ID.value, GroupBy.INPUT_DATA_ID.value],
|
|
641
|
+
default=GroupBy.TASK_ID.value,
|
|
642
|
+
help="アノテーションの個数をどの単位で集約するかを指定してます。",
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
parser.add_argument(
|
|
646
|
+
"--include_flag_attribute",
|
|
647
|
+
action="store_true",
|
|
648
|
+
help="指定した場合は、On/Off属性(チェックボックス)も集計対象にします。"
|
|
649
|
+
"On/Off属性は基本的に常に「入力されている」と判定されるため、デフォルトでは集計対象外にしています。"
|
|
650
|
+
"``--project_id`` が指定されているときのみ有効なオプションです。",
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
argument_parser.add_format(
|
|
654
|
+
choices=[FormatArgument.CSV, FormatArgument.JSON, FormatArgument.PRETTY_JSON],
|
|
655
|
+
default=FormatArgument.CSV,
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
argument_parser.add_output()
|
|
659
|
+
|
|
660
|
+
parser.add_argument(
|
|
661
|
+
"-tq",
|
|
662
|
+
"--task_query",
|
|
663
|
+
type=str,
|
|
664
|
+
help="集計対象タスクを絞り込むためのクエリ条件をJSON形式で指定します。使用できるキーは task_id, status, phase, phase_stage です。"
|
|
665
|
+
" ``file://`` を先頭に付けると、JSON形式のファイルを指定できます。",
|
|
666
|
+
)
|
|
667
|
+
argument_parser.add_task_id(required=False)
|
|
668
|
+
|
|
669
|
+
parser.add_argument(
|
|
670
|
+
"--latest",
|
|
671
|
+
action="store_true",
|
|
672
|
+
help="``--annotation`` を指定しないとき、最新のアノテーションzipを参照します。このオプションを指定すると、アノテーションzipを更新するのに数分待ちます。", # noqa: E501
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
parser.set_defaults(subcommand_func=main)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def add_parser(subparsers: Optional[argparse._SubParsersAction] = None) -> argparse.ArgumentParser:
|
|
679
|
+
subcommand_name = "list_annotation_attribute_filled_count"
|
|
680
|
+
subcommand_help = "値が入力されている属性の個数を、タスクごとまたは入力データごとに集計します。"
|
|
681
|
+
|
|
682
|
+
parser = annofabcli.common.cli.add_parser(subparsers, subcommand_name, subcommand_help)
|
|
683
|
+
parse_args(parser)
|
|
684
|
+
return parser
|