annofabcli 1.98.0__py3-none-any.whl → 1.100.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,684 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import collections
5
+ import json
6
+ import logging
7
+ import sys
8
+ import tempfile
9
+ import zipfile
10
+ from collections import defaultdict
11
+ from collections.abc import Collection, Iterator
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from functools import partial
15
+ from pathlib import Path
16
+ from typing import Any, Literal, Optional, Protocol, Union
17
+
18
+ import annofabapi
19
+ import pandas
20
+ from annofabapi.models import ProjectMemberRole
21
+ from annofabapi.parser import (
22
+ SimpleAnnotationParser,
23
+ lazy_parse_simple_annotation_dir,
24
+ lazy_parse_simple_annotation_zip,
25
+ )
26
+ from annofabapi.pydantic_models.additional_data_definition_type import AdditionalDataDefinitionType
27
+ from annofabapi.pydantic_models.task_phase import TaskPhase
28
+ from annofabapi.pydantic_models.task_status import TaskStatus
29
+ from dataclasses_json import DataClassJsonMixin, config
30
+
31
+ import annofabcli
32
+ import annofabcli.common.cli
33
+ from annofabcli.common.cli import (
34
+ COMMAND_LINE_ERROR_STATUS_CODE,
35
+ ArgumentParser,
36
+ CommandLine,
37
+ build_annofabapi_resource_and_login,
38
+ )
39
+ from annofabcli.common.download import DownloadingFile
40
+ from annofabcli.common.enums import FormatArgument
41
+ from annofabcli.common.facade import (
42
+ AnnofabApiFacade,
43
+ TaskQuery,
44
+ match_annotation_with_task_query,
45
+ )
46
+ from annofabcli.common.type_util import assert_noreturn
47
+ from annofabcli.common.utils import print_csv, print_json
48
+ from annofabcli.statistics.list_annotation_count import AnnotationSpecs
49
+
50
+ logger = logging.getLogger(__name__)
51
+
52
+
53
+ AttributeValueType = Literal["filled", "empty"]
54
+ AttributeValueKey = tuple[str, str, AttributeValueType]
55
+ """
56
+ 属性のキー.
57
+ tuple[label_name_en, attribute_name_en, filled | empty] で表す。
58
+ """
59
+
60
+
61
+ AttributeNameKey = tuple[str, str]
62
+ """
63
+ 属性名のキー.
64
+ tuple[label_name_en, attribute_name_en] で表す。
65
+ """
66
+
67
+
68
+ AttributeKeys = Collection[Collection]
69
+
70
+
71
+ class HasAnnotationAttributeCounts(Protocol):
72
+ annotation_attribute_counts: dict[AttributeValueKey, int]
73
+
74
+
75
+ class GroupBy(Enum):
76
+ TASK_ID = "task_id"
77
+ INPUT_DATA_ID = "input_data_id"
78
+
79
+
80
+ def encode_annotation_count_by_attribute(
81
+ annotation_count_by_attribute: dict[AttributeValueKey, int],
82
+ ) -> dict[str, dict[str, dict[str, int]]]:
83
+ """annotation_duration_second_by_attributeを `{label_name: {attribute_name: {attribute_value: annotation_count}}}`のdictに変換します。
84
+ JSONへの変換用関数です。
85
+ """
86
+
87
+ def _factory() -> collections.defaultdict:
88
+ """入れ子の辞書を利用できるようにするための関数"""
89
+ return collections.defaultdict(_factory)
90
+
91
+ result: dict[str, dict[str, dict[str, int]]] = defaultdict(_factory)
92
+ for (label_name, attribute_name, attribute_value_type), annotation_count in annotation_count_by_attribute.items():
93
+ result[label_name][attribute_name][attribute_value_type] = annotation_count
94
+ return result
95
+
96
+
97
+ @dataclass(frozen=True)
98
+ class AnnotationCountByInputData(DataClassJsonMixin, HasAnnotationAttributeCounts):
99
+ """
100
+ 入力データ単位のアノテーション数の情報。
101
+ """
102
+
103
+ task_id: str
104
+ task_status: TaskStatus
105
+ task_phase: TaskPhase
106
+ task_phase_stage: int
107
+
108
+ input_data_id: str
109
+ input_data_name: str
110
+
111
+ annotation_attribute_counts: dict[AttributeValueKey, int] = field(
112
+ metadata=config(
113
+ encoder=encode_annotation_count_by_attribute,
114
+ )
115
+ )
116
+ """属性値ごとのアノテーションの個数
117
+ key: tuple[ラベル名(英語),属性名(英語),属性値の種類], value: アノテーション数
118
+ """
119
+ frame_no: Optional[int] = None
120
+ """フレーム番号(1始まり)。アノテーションJSONには含まれていない情報なので、Optionalにする"""
121
+
122
+
123
+ @dataclass(frozen=True)
124
+ class AnnotationCountByTask(DataClassJsonMixin, HasAnnotationAttributeCounts):
125
+ """
126
+ タスク単位のアノテーション数の情報。
127
+ """
128
+
129
+ task_id: str
130
+ task_status: TaskStatus
131
+ task_phase: TaskPhase
132
+ task_phase_stage: int
133
+ input_data_count: int
134
+
135
+ annotation_attribute_counts: dict[AttributeValueKey, int] = field(
136
+ metadata=config(
137
+ encoder=encode_annotation_count_by_attribute,
138
+ )
139
+ )
140
+ """属性値ごとのアノテーションの個数
141
+ key: tuple[ラベル名(英語),属性名(英語),属性値の種類], value: アノテーション数
142
+ """
143
+
144
+
145
+ def lazy_parse_simple_annotation_by_input_data(annotation_path: Path) -> Iterator[SimpleAnnotationParser]:
146
+ if not annotation_path.exists():
147
+ raise RuntimeError(f"'{annotation_path}' は存在しません。")
148
+
149
+ if annotation_path.is_dir():
150
+ return lazy_parse_simple_annotation_dir(annotation_path)
151
+ elif zipfile.is_zipfile(str(annotation_path)):
152
+ return lazy_parse_simple_annotation_zip(annotation_path)
153
+ else:
154
+ raise RuntimeError(f"'{annotation_path}'は、zipファイルまたはディレクトリではありません。")
155
+
156
+
157
+ def convert_annotation_count_list_by_input_data_to_by_task(annotation_count_list: list[AnnotationCountByInputData]) -> list[AnnotationCountByTask]:
158
+ """
159
+ 入力データ単位のアノテーション数情報をタスク単位のアノテーション数情報に変換する
160
+ """
161
+ tmp_dict: dict[str, list[AnnotationCountByInputData]] = collections.defaultdict(list)
162
+ for annotation_count in annotation_count_list:
163
+ tmp_dict[annotation_count.task_id].append(annotation_count)
164
+
165
+ result = []
166
+ for task_id, annotation_count_list_by_input_data in tmp_dict.items():
167
+ first_elm = annotation_count_list_by_input_data[0]
168
+ input_data_count = len(annotation_count_list_by_input_data)
169
+
170
+ annotation_attribute_counts: dict[AttributeValueKey, int] = defaultdict(int)
171
+ for elm in annotation_count_list_by_input_data:
172
+ for key, value in elm.annotation_attribute_counts.items():
173
+ annotation_attribute_counts[key] += value
174
+
175
+ result.append(
176
+ AnnotationCountByTask(
177
+ task_id=task_id,
178
+ task_status=first_elm.task_status,
179
+ task_phase=first_elm.task_phase,
180
+ task_phase_stage=first_elm.task_phase_stage,
181
+ input_data_count=input_data_count,
182
+ annotation_attribute_counts=annotation_attribute_counts,
183
+ )
184
+ )
185
+ return result
186
+
187
+
188
+ class ListAnnotationCounterByInputData:
189
+ """入力データ単位で、ラベルごと/属性ごとのアノテーション数を集計情報を取得するメソッドの集まり。
190
+
191
+ Args:
192
+ target_labels: 集計対象のラベル(label_name_en)
193
+ target_attribute_names: 集計対象の属性名
194
+ non_target_labels: 集計対象外のラベル
195
+ non_target_attribute_names: 集計対象外の属性名のキー。
196
+
197
+ """
198
+
199
+ def __init__(
200
+ self,
201
+ *,
202
+ target_labels: Optional[Collection[str]] = None,
203
+ non_target_labels: Optional[Collection[str]] = None,
204
+ target_attribute_names: Optional[Collection[AttributeNameKey]] = None,
205
+ non_target_attribute_names: Optional[Collection[AttributeNameKey]] = None,
206
+ frame_no_map: Optional[dict[tuple[str, str], int]] = None,
207
+ ) -> None:
208
+ self.target_labels = set(target_labels) if target_labels is not None else None
209
+ self.target_attribute_names = set(target_attribute_names) if target_attribute_names is not None else None
210
+ self.non_target_labels = set(non_target_labels) if non_target_labels is not None else None
211
+ self.non_target_attribute_names = set(non_target_attribute_names) if non_target_attribute_names is not None else None
212
+ self.frame_no_map = frame_no_map
213
+
214
+ def get_annotation_count(self, simple_annotation: dict[str, Any]) -> AnnotationCountByInputData:
215
+ """
216
+ 1個のアノテーションJSONに対して、属性値ごとのアノテーション数を取得します。
217
+
218
+ Args:
219
+ simple_annotation: アノテーションJSONファイルの内容
220
+
221
+ """
222
+
223
+ def convert_attribute_value_to_type(value: Optional[Union[bool, str, float]]) -> AttributeValueType: # noqa: FBT001
224
+ """
225
+ アノテーションJSONに格納されている属性値を、dict用のkeyに変換する。
226
+
227
+ Notes:
228
+ アノテーションJSONに格納されている属性値の型はbool, str, floatの3つ
229
+
230
+ """
231
+ if value is None:
232
+ return "empty"
233
+
234
+ if isinstance(value, str) and value == "":
235
+ return "empty"
236
+
237
+ return "filled"
238
+
239
+ details: list[dict[str, Any]] = simple_annotation["details"]
240
+
241
+ annotation_count_by_attribute: dict[AttributeValueKey, int] = defaultdict(int)
242
+ for detail in details:
243
+ label = detail["label"]
244
+
245
+ if self.target_labels is not None and label not in self.target_labels:
246
+ continue
247
+
248
+ if self.non_target_labels is not None and label in self.non_target_labels:
249
+ continue
250
+
251
+ for attribute_name, attribute_value in detail["attributes"].items():
252
+ if self.target_attribute_names is not None and (label, attribute_name) not in self.target_attribute_names:
253
+ continue
254
+
255
+ if self.non_target_attribute_names is not None and (label, attribute_name) in self.non_target_attribute_names:
256
+ continue
257
+
258
+ attribute_key = (label, attribute_name, convert_attribute_value_to_type(attribute_value))
259
+ annotation_count_by_attribute[attribute_key] += 1
260
+
261
+ frame_no: Optional[int] = None
262
+ if self.frame_no_map is not None:
263
+ frame_no = self.frame_no_map.get((simple_annotation["task_id"], simple_annotation["input_data_id"]))
264
+
265
+ return AnnotationCountByInputData(
266
+ task_id=simple_annotation["task_id"],
267
+ task_phase=TaskPhase(simple_annotation["task_phase"]),
268
+ task_phase_stage=simple_annotation["task_phase_stage"],
269
+ task_status=TaskStatus(simple_annotation["task_status"]),
270
+ input_data_id=simple_annotation["input_data_id"],
271
+ input_data_name=simple_annotation["input_data_name"],
272
+ annotation_attribute_counts=annotation_count_by_attribute,
273
+ frame_no=frame_no,
274
+ )
275
+
276
+ def get_annotation_count_list(
277
+ self,
278
+ annotation_path: Path,
279
+ *,
280
+ target_task_ids: Optional[Collection[str]] = None,
281
+ task_query: Optional[TaskQuery] = None,
282
+ ) -> list[AnnotationCountByInputData]:
283
+ """
284
+ アノテーションzipまたはそれを展開したディレクトリから、属性値ごとのアノテーション数を取得する。
285
+
286
+ """
287
+ annotation_duration_list = []
288
+
289
+ target_task_ids = set(target_task_ids) if target_task_ids is not None else None
290
+
291
+ iter_parser = lazy_parse_simple_annotation_by_input_data(annotation_path)
292
+
293
+ logger.debug("アノテーションzip/ディレクトリを読み込み中")
294
+ for index, parser in enumerate(iter_parser):
295
+ if (index + 1) % 1000 == 0:
296
+ logger.debug(f"{index + 1} 件目のJSONを読み込み中")
297
+
298
+ if target_task_ids is not None and parser.task_id not in target_task_ids:
299
+ continue
300
+
301
+ simple_annotation_dict = parser.load_json()
302
+ if task_query is not None: # noqa: SIM102
303
+ if not match_annotation_with_task_query(simple_annotation_dict, task_query):
304
+ continue
305
+
306
+ annotation_count = self.get_annotation_count(simple_annotation_dict)
307
+ annotation_duration_list.append(annotation_count)
308
+
309
+ return annotation_duration_list
310
+
311
+
312
+ class AnnotationCountCsvByAttribute:
313
+ """
314
+ 属性値ごとのアノテーション数をCSVに出力するためのクラス
315
+
316
+ Args:
317
+ selective_attribute_value_max_count: 選択肢系の属性の値の個数の上限。これを超えた場合は、非選択肢系属性(トラッキングIDやアノテーションリンクなど)とみなす
318
+
319
+ """ # noqa: E501
320
+
321
+ def __init__(self, selective_attribute_value_max_count: int = 20) -> None:
322
+ self.selective_attribute_value_max_count = selective_attribute_value_max_count
323
+
324
+ def _value_columns(
325
+ self, annotation_count_list: Collection[HasAnnotationAttributeCounts], *, prior_attribute_columns: Optional[list[tuple[str, str, str]]]
326
+ ) -> list[tuple[str, str, str]]:
327
+ """
328
+ CSVの数値列を取得します。
329
+ """
330
+ all_attr_key_set = {attr_key for c in annotation_count_list for attr_key in c.annotation_attribute_counts}
331
+ if prior_attribute_columns is not None:
332
+ remaining_columns = sorted(all_attr_key_set - set(prior_attribute_columns)) # type: ignore[arg-type]
333
+ value_columns = prior_attribute_columns + remaining_columns
334
+
335
+ else:
336
+ value_columns = sorted(all_attr_key_set)
337
+
338
+ # 重複している場合は、重複要素を取り除く。ただし元の順番は維持する
339
+ value_columns = list(dict.fromkeys(value_columns).keys())
340
+ return value_columns
341
+
342
+ def get_columns_by_input_data(
343
+ self,
344
+ annotation_count_list: list[AnnotationCountByInputData],
345
+ prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
346
+ ) -> list[tuple[str, str, str]]:
347
+ basic_columns = [
348
+ ("task_id", "", ""),
349
+ ("task_status", "", ""),
350
+ ("task_phase", "", ""),
351
+ ("task_phase_stage", "", ""),
352
+ ("input_data_id", "", ""),
353
+ ("input_data_name", "", ""),
354
+ ("frame_no", "", ""),
355
+ ]
356
+ value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
357
+ return basic_columns + value_columns
358
+
359
+ def get_columns_by_task(
360
+ self,
361
+ annotation_count_list: list[AnnotationCountByTask],
362
+ prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
363
+ ) -> list[tuple[str, str, str]]:
364
+ basic_columns = [
365
+ ("task_id", "", ""),
366
+ ("task_status", "", ""),
367
+ ("task_phase", "", ""),
368
+ ("task_phase_stage", "", ""),
369
+ ("input_data_count", "", ""),
370
+ ]
371
+ value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
372
+ return basic_columns + value_columns
373
+
374
+ def create_df_by_input_data(
375
+ self,
376
+ annotation_count_list: list[AnnotationCountByInputData],
377
+ *,
378
+ prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
379
+ ) -> pandas.DataFrame:
380
+ def to_cell(c: AnnotationCountByInputData) -> dict[tuple[str, str, str], Any]:
381
+ cell: dict[tuple[str, str, str], Any] = {
382
+ ("task_id", "", ""): c.task_id,
383
+ ("task_status", "", ""): c.task_status.value,
384
+ ("task_phase", "", ""): c.task_phase.value,
385
+ ("task_phase_stage", "", ""): c.task_phase_stage,
386
+ ("input_data_id", "", ""): c.input_data_id,
387
+ ("input_data_name", "", ""): c.input_data_name,
388
+ ("frame_no", "", ""): c.frame_no,
389
+ }
390
+ cell.update(c.annotation_attribute_counts) # type: ignore[arg-type]
391
+
392
+ return cell
393
+
394
+ columns = self.get_columns_by_input_data(annotation_count_list, prior_attribute_columns)
395
+ df = pandas.DataFrame([to_cell(e) for e in annotation_count_list], columns=pandas.MultiIndex.from_tuples(columns))
396
+
397
+ # アノテーション数の列のNaNを0に変換する
398
+ value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
399
+ df = df.fillna(dict.fromkeys(value_columns, 0))
400
+ return df
401
+
402
+ def create_df_by_task(
403
+ self,
404
+ annotation_count_list: list[AnnotationCountByTask],
405
+ *,
406
+ prior_attribute_columns: Optional[list[tuple[str, str, str]]] = None,
407
+ ) -> pandas.DataFrame:
408
+ def to_cell(c: AnnotationCountByTask) -> dict[tuple[str, str, str], Any]:
409
+ cell: dict[tuple[str, str, str], Any] = {
410
+ ("task_id", "", ""): c.task_id,
411
+ ("task_status", "", ""): c.task_status.value,
412
+ ("task_phase", "", ""): c.task_phase.value,
413
+ ("task_phase_stage", "", ""): c.task_phase_stage,
414
+ ("input_data_count", "", ""): c.input_data_count,
415
+ }
416
+ cell.update(c.annotation_attribute_counts) # type: ignore[arg-type]
417
+
418
+ return cell
419
+
420
+ columns = self.get_columns_by_task(annotation_count_list, prior_attribute_columns)
421
+ df = pandas.DataFrame([to_cell(e) for e in annotation_count_list], columns=pandas.MultiIndex.from_tuples(columns))
422
+
423
+ # アノテーション数の列のNaNを0に変換する
424
+ value_columns = self._value_columns(annotation_count_list, prior_attribute_columns=prior_attribute_columns)
425
+ df = df.fillna(dict.fromkeys(value_columns, 0))
426
+ return df
427
+
428
+
429
+ def get_frame_no_map(task_json_path: Path) -> dict[tuple[str, str], int]:
430
+ with task_json_path.open(encoding="utf-8") as f:
431
+ task_list = json.load(f)
432
+
433
+ result = {}
434
+ for task in task_list:
435
+ task_id = task["task_id"]
436
+ input_data_id_list = task["input_data_id_list"]
437
+ for index, input_data_id in enumerate(input_data_id_list):
438
+ # 画面に合わせて1始まりにする
439
+ result[(task_id, input_data_id)] = index + 1
440
+ return result
441
+
442
+
443
+ def get_attribute_columns(attribute_names: list[tuple[str, str]]) -> list[tuple[str, str, str]]:
444
+ attribute_columns = [
445
+ (label_name, attribute_name, value_type) for label_name, attribute_name in attribute_names for value_type in ["filled", "empty"]
446
+ ]
447
+ return attribute_columns
448
+
449
+
450
+ class ListAnnotationAttributeFilledCountMain:
451
+ def __init__(self, service: annofabapi.Resource) -> None:
452
+ self.service = service
453
+
454
+ def print_annotation_count_csv_by_input_data(
455
+ self, annotation_count_list: list[AnnotationCountByInputData], output_file: Path, *, attribute_names: Optional[list[tuple[str, str]]]
456
+ ) -> None:
457
+ attribute_columns: Optional[list[tuple[str, str, str]]] = None
458
+ if attribute_names is not None:
459
+ attribute_columns = get_attribute_columns(attribute_names)
460
+
461
+ df = AnnotationCountCsvByAttribute().create_df_by_input_data(annotation_count_list, prior_attribute_columns=attribute_columns)
462
+ print_csv(df, output_file)
463
+
464
+ def print_annotation_count_csv_by_task(
465
+ self, annotation_count_list: list[AnnotationCountByTask], output_file: Path, *, attribute_names: Optional[list[tuple[str, str]]]
466
+ ) -> None:
467
+ attribute_columns: Optional[list[tuple[str, str, str]]] = None
468
+ if attribute_names is not None:
469
+ attribute_columns = get_attribute_columns(attribute_names)
470
+
471
+ df = AnnotationCountCsvByAttribute().create_df_by_task(annotation_count_list, prior_attribute_columns=attribute_columns)
472
+ print_csv(df, output_file)
473
+
474
+ def print_annotation_count(
475
+ self,
476
+ annotation_path: Path,
477
+ output_file: Path,
478
+ group_by: GroupBy,
479
+ output_format: FormatArgument,
480
+ *,
481
+ project_id: Optional[str] = None,
482
+ include_flag_attribute: bool = False,
483
+ task_json_path: Optional[Path] = None,
484
+ target_task_ids: Optional[Collection[str]] = None,
485
+ task_query: Optional[TaskQuery] = None,
486
+ ) -> None:
487
+ annotation_specs: Optional[AnnotationSpecs] = None
488
+ target_attribute_names: Optional[list[AttributeNameKey]] = None
489
+ if project_id is not None:
490
+ annotation_specs = AnnotationSpecs(self.service, project_id)
491
+ if not include_flag_attribute:
492
+ target_attribute_names = annotation_specs.attribute_name_keys(excluded_attribute_types=[AdditionalDataDefinitionType.FLAG])
493
+
494
+ frame_no_map = get_frame_no_map(task_json_path) if task_json_path is not None else None
495
+
496
+ annotation_count_list_by_input_data = ListAnnotationCounterByInputData(
497
+ frame_no_map=frame_no_map, target_attribute_names=target_attribute_names
498
+ ).get_annotation_count_list(
499
+ annotation_path,
500
+ target_task_ids=target_task_ids,
501
+ task_query=task_query,
502
+ )
503
+
504
+ if group_by == GroupBy.INPUT_DATA_ID:
505
+ logger.info(f"{len(annotation_count_list_by_input_data)} 件の入力データに含まれるアノテーション数の情報を出力します。")
506
+ if output_format == FormatArgument.CSV:
507
+ self.print_annotation_count_csv_by_input_data(
508
+ annotation_count_list_by_input_data, output_file=output_file, attribute_names=target_attribute_names
509
+ )
510
+
511
+ elif output_format in [FormatArgument.PRETTY_JSON, FormatArgument.JSON]:
512
+ json_is_pretty = output_format == FormatArgument.PRETTY_JSON
513
+
514
+ print_json(
515
+ [e.to_dict(encode_json=True) for e in annotation_count_list_by_input_data],
516
+ is_pretty=json_is_pretty,
517
+ output=output_file,
518
+ )
519
+ elif group_by == GroupBy.TASK_ID:
520
+ annotation_count_list_by_task = convert_annotation_count_list_by_input_data_to_by_task(annotation_count_list_by_input_data)
521
+ logger.info(f"{len(annotation_count_list_by_task)} 件のタスクに含まれるアノテーション数の情報を出力します。")
522
+ if output_format == FormatArgument.CSV:
523
+ self.print_annotation_count_csv_by_task(
524
+ annotation_count_list_by_task, output_file=output_file, attribute_names=target_attribute_names
525
+ )
526
+
527
+ elif output_format in [FormatArgument.PRETTY_JSON, FormatArgument.JSON]:
528
+ json_is_pretty = output_format == FormatArgument.PRETTY_JSON
529
+
530
+ print_json(
531
+ [e.to_dict(encode_json=True) for e in annotation_count_list_by_task],
532
+ is_pretty=json_is_pretty,
533
+ output=output_file,
534
+ )
535
+
536
+ else:
537
+ raise assert_noreturn(group_by)
538
+
539
+
540
+ class ListAnnotationAttributeFilledCount(CommandLine):
541
+ COMMON_MESSAGE = "annofabcli annotation list_annotation_attribute_filled_count: error:"
542
+
543
+ def validate(self, args: argparse.Namespace) -> bool:
544
+ if args.project_id is None and args.annotation is None:
545
+ print( # noqa: T201
546
+ f"{self.COMMON_MESSAGE} argument --project_id: '--annotation'が未指定のときは、'--project_id' を指定してください。",
547
+ file=sys.stderr,
548
+ )
549
+ return False
550
+
551
+ return True
552
+
553
+ def main(self) -> None:
554
+ args = self.args
555
+
556
+ if not self.validate(args):
557
+ sys.exit(COMMAND_LINE_ERROR_STATUS_CODE)
558
+
559
+ project_id: Optional[str] = args.project_id
560
+ if project_id is not None:
561
+ super().validate_project(project_id, project_member_roles=[ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
562
+
563
+ annotation_path = Path(args.annotation) if args.annotation is not None else None
564
+
565
+ task_id_list = annofabcli.common.cli.get_list_from_args(args.task_id) if args.task_id is not None else None
566
+ task_query = TaskQuery.from_dict(annofabcli.common.cli.get_json_from_args(args.task_query)) if args.task_query is not None else None
567
+
568
+ group_by = GroupBy(args.group_by)
569
+ output_file: Path = args.output
570
+ output_format = FormatArgument(args.format)
571
+ main_obj = ListAnnotationAttributeFilledCountMain(self.service)
572
+
573
+ downloading_obj = DownloadingFile(self.service)
574
+
575
+ # `NamedTemporaryFile`を使わない理由: Windowsで`PermissionError`が発生するため
576
+ # https://qiita.com/yuji38kwmt/items/c6f50e1fc03dafdcdda0 参考
577
+ with tempfile.TemporaryDirectory() as str_temp_dir:
578
+ # タスク全件ファイルは、フレーム番号を参照するのに利用する
579
+ if project_id is not None and group_by == GroupBy.INPUT_DATA_ID:
580
+ # group_byで条件を絞り込んでいる理由:
581
+ # タスクIDで集計する際は、フレーム番号は出力しないので、タスク全件ファイルをダウンロードする必要はないため
582
+ task_json_path = Path(str_temp_dir) / f"{project_id}__task.json"
583
+ downloading_obj.download_task_json(
584
+ project_id,
585
+ dest_path=str(task_json_path),
586
+ )
587
+ else:
588
+ task_json_path = None
589
+
590
+ func = partial(
591
+ main_obj.print_annotation_count,
592
+ project_id=project_id,
593
+ task_json_path=task_json_path,
594
+ group_by=group_by,
595
+ output_format=output_format,
596
+ output_file=output_file,
597
+ target_task_ids=task_id_list,
598
+ task_query=task_query,
599
+ include_flag_attribute=args.include_flag_attribute,
600
+ )
601
+
602
+ if annotation_path is None:
603
+ assert project_id is not None
604
+ annotation_path = Path(str_temp_dir) / f"{project_id}__annotation.zip"
605
+ downloading_obj.download_annotation_zip(
606
+ project_id,
607
+ dest_path=str(annotation_path),
608
+ is_latest=args.latest,
609
+ )
610
+ func(annotation_path=annotation_path)
611
+ else:
612
+ func(annotation_path=annotation_path)
613
+
614
+
615
+ def main(args: argparse.Namespace) -> None:
616
+ service = build_annofabapi_resource_and_login(args)
617
+ facade = AnnofabApiFacade(service)
618
+ ListAnnotationAttributeFilledCount(service, facade, args).main()
619
+
620
+
621
+ def parse_args(parser: argparse.ArgumentParser) -> None:
622
+ argument_parser = ArgumentParser(parser)
623
+
624
+ parser.add_argument(
625
+ "--annotation",
626
+ type=str,
627
+ help="アノテーションzip、またはzipを展開したディレクトリを指定します。指定しない場合はAnnofabからダウンロードします。",
628
+ )
629
+
630
+ parser.add_argument(
631
+ "-p",
632
+ "--project_id",
633
+ type=str,
634
+ help="project_id。``--annotation`` が未指定のときは必須です。``--annotation`` が指定されているときに ``--project_id`` を指定すると、アノテーション仕様を参照して、集計対象の属性やCSV列順が決まります。", # noqa: E501
635
+ )
636
+
637
+ parser.add_argument(
638
+ "--group_by",
639
+ type=str,
640
+ choices=[GroupBy.TASK_ID.value, GroupBy.INPUT_DATA_ID.value],
641
+ default=GroupBy.TASK_ID.value,
642
+ help="アノテーションの個数をどの単位で集約するかを指定してます。",
643
+ )
644
+
645
+ parser.add_argument(
646
+ "--include_flag_attribute",
647
+ action="store_true",
648
+ help="指定した場合は、On/Off属性(チェックボックス)も集計対象にします。"
649
+ "On/Off属性は基本的に常に「入力されている」と判定されるため、デフォルトでは集計対象外にしています。"
650
+ "``--project_id`` が指定されているときのみ有効なオプションです。",
651
+ )
652
+
653
+ argument_parser.add_format(
654
+ choices=[FormatArgument.CSV, FormatArgument.JSON, FormatArgument.PRETTY_JSON],
655
+ default=FormatArgument.CSV,
656
+ )
657
+
658
+ argument_parser.add_output()
659
+
660
+ parser.add_argument(
661
+ "-tq",
662
+ "--task_query",
663
+ type=str,
664
+ help="集計対象タスクを絞り込むためのクエリ条件をJSON形式で指定します。使用できるキーは task_id, status, phase, phase_stage です。"
665
+ " ``file://`` を先頭に付けると、JSON形式のファイルを指定できます。",
666
+ )
667
+ argument_parser.add_task_id(required=False)
668
+
669
+ parser.add_argument(
670
+ "--latest",
671
+ action="store_true",
672
+ help="``--annotation`` を指定しないとき、最新のアノテーションzipを参照します。このオプションを指定すると、アノテーションzipを更新するのに数分待ちます。", # noqa: E501
673
+ )
674
+
675
+ parser.set_defaults(subcommand_func=main)
676
+
677
+
678
+ def add_parser(subparsers: Optional[argparse._SubParsersAction] = None) -> argparse.ArgumentParser:
679
+ subcommand_name = "list_annotation_attribute_filled_count"
680
+ subcommand_help = "値が入力されている属性の個数を、タスクごとまたは入力データごとに集計します。"
681
+
682
+ parser = annofabcli.common.cli.add_parser(subparsers, subcommand_name, subcommand_help)
683
+ parse_args(parser)
684
+ return parser