annofabcli 1.109.0__py3-none-any.whl → 1.111.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- annofabcli/annotation_zip/list_annotation_bounding_box_2d.py +49 -37
- annofabcli/annotation_zip/list_range_annotation.py +308 -0
- annofabcli/annotation_zip/list_single_point_annotation.py +317 -0
- annofabcli/annotation_zip/subcommand_annotation_zip.py +6 -0
- annofabcli/annotation_zip/validate_annotation.py +393 -0
- annofabcli/common/download.py +97 -0
- annofabcli/statistics/list_annotation_area.py +2 -3
- annofabcli/statistics/list_annotation_attribute_filled_count.py +35 -10
- annofabcli/statistics/list_annotation_count.py +39 -14
- annofabcli/statistics/list_annotation_duration.py +4 -6
- annofabcli/statistics/summarize_task_count.py +53 -33
- annofabcli/statistics/summarize_task_count_by_task_id_group.py +30 -13
- annofabcli/statistics/summarize_task_count_by_user.py +32 -15
- annofabcli/statistics/visualization/dataframe/annotation_count.py +31 -3
- annofabcli/statistics/visualization/dataframe/annotation_duration.py +121 -0
- annofabcli/statistics/visualize_statistics.py +83 -5
- annofabcli/task/complete_tasks.py +2 -2
- annofabcli/task/put_tasks.py +1 -1
- {annofabcli-1.109.0.dist-info → annofabcli-1.111.0.dist-info}/METADATA +1 -1
- {annofabcli-1.109.0.dist-info → annofabcli-1.111.0.dist-info}/RECORD +23 -19
- {annofabcli-1.109.0.dist-info → annofabcli-1.111.0.dist-info}/WHEEL +0 -0
- {annofabcli-1.109.0.dist-info → annofabcli-1.111.0.dist-info}/entry_points.txt +0 -0
- {annofabcli-1.109.0.dist-info → annofabcli-1.111.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -505,7 +505,7 @@ class AttributeCountCsv:
|
|
|
505
505
|
# `task_id`列など`basic_columns`も`fillna`対象だが、nanではないはずので問題ない
|
|
506
506
|
df.fillna(0, inplace=True)
|
|
507
507
|
|
|
508
|
-
print_csv(df, output=
|
|
508
|
+
print_csv(df, output=output_file)
|
|
509
509
|
|
|
510
510
|
def print_csv_by_input_data(
|
|
511
511
|
self,
|
|
@@ -553,7 +553,7 @@ class AttributeCountCsv:
|
|
|
553
553
|
value_columns = self._value_columns(counter_list, prior_attribute_columns)
|
|
554
554
|
df = df.fillna(dict.fromkeys(value_columns, 0))
|
|
555
555
|
|
|
556
|
-
print_csv(df, output=
|
|
556
|
+
print_csv(df, output=output_file)
|
|
557
557
|
|
|
558
558
|
|
|
559
559
|
class LabelCountCsv:
|
|
@@ -612,7 +612,7 @@ class LabelCountCsv:
|
|
|
612
612
|
# NaNを0に変換する
|
|
613
613
|
# `basic_columns`は必ずnanではないので、すべての列に対してfillnaを実行しても問題ないはず
|
|
614
614
|
df.fillna(0, inplace=True)
|
|
615
|
-
print_csv(df, output=
|
|
615
|
+
print_csv(df, output=output_file)
|
|
616
616
|
|
|
617
617
|
def print_csv_by_input_data(
|
|
618
618
|
self,
|
|
@@ -659,7 +659,7 @@ class LabelCountCsv:
|
|
|
659
659
|
value_columns = self._value_columns(counter_list, prior_label_columns)
|
|
660
660
|
df = df.fillna(dict.fromkeys(value_columns, 0))
|
|
661
661
|
|
|
662
|
-
print_csv(df, output=
|
|
662
|
+
print_csv(df, output=output_file)
|
|
663
663
|
|
|
664
664
|
|
|
665
665
|
class AnnotationSpecs:
|
|
@@ -1099,15 +1099,13 @@ class ListAnnotationCount(CommandLine):
|
|
|
1099
1099
|
|
|
1100
1100
|
downloading_obj = DownloadingFile(self.service)
|
|
1101
1101
|
|
|
1102
|
-
|
|
1103
|
-
# https://qiita.com/yuji38kwmt/items/c6f50e1fc03dafdcdda0 参考
|
|
1104
|
-
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
1102
|
+
def download_and_process_annotation(temp_dir: Path, *, is_latest: bool, annotation_path: Optional[Path]) -> None:
|
|
1105
1103
|
# タスク全件ファイルは、フレーム番号を参照するのに利用する
|
|
1106
1104
|
if project_id is not None:
|
|
1107
|
-
task_json_path =
|
|
1108
|
-
downloading_obj.download_task_json(
|
|
1105
|
+
task_json_path = downloading_obj.download_task_json_to_dir(
|
|
1109
1106
|
project_id,
|
|
1110
|
-
|
|
1107
|
+
temp_dir,
|
|
1108
|
+
is_latest=is_latest,
|
|
1111
1109
|
)
|
|
1112
1110
|
else:
|
|
1113
1111
|
task_json_path = None
|
|
@@ -1126,16 +1124,37 @@ class ListAnnotationCount(CommandLine):
|
|
|
1126
1124
|
|
|
1127
1125
|
if annotation_path is None:
|
|
1128
1126
|
assert project_id is not None
|
|
1129
|
-
annotation_path =
|
|
1130
|
-
downloading_obj.download_annotation_zip(
|
|
1127
|
+
annotation_path = downloading_obj.download_annotation_zip_to_dir(
|
|
1131
1128
|
project_id,
|
|
1132
|
-
|
|
1133
|
-
is_latest=
|
|
1129
|
+
temp_dir,
|
|
1130
|
+
is_latest=is_latest,
|
|
1134
1131
|
)
|
|
1135
1132
|
func(annotation_path=annotation_path)
|
|
1136
1133
|
else:
|
|
1137
1134
|
func(annotation_path=annotation_path)
|
|
1138
1135
|
|
|
1136
|
+
if project_id is not None:
|
|
1137
|
+
if args.temp_dir is not None:
|
|
1138
|
+
download_and_process_annotation(temp_dir=args.temp_dir, is_latest=args.latest, annotation_path=annotation_path)
|
|
1139
|
+
else:
|
|
1140
|
+
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
1141
|
+
download_and_process_annotation(temp_dir=Path(str_temp_dir), is_latest=args.latest, annotation_path=annotation_path)
|
|
1142
|
+
else:
|
|
1143
|
+
# プロジェクトIDが指定されていない場合は、アノテーションパスが必須なので、一時ディレクトリは不要
|
|
1144
|
+
assert annotation_path is not None
|
|
1145
|
+
func = partial(
|
|
1146
|
+
main_obj.print_annotation_counter,
|
|
1147
|
+
project_id=project_id,
|
|
1148
|
+
task_json_path=None,
|
|
1149
|
+
group_by=group_by,
|
|
1150
|
+
csv_type=csv_type,
|
|
1151
|
+
arg_format=arg_format,
|
|
1152
|
+
output_file=output_file,
|
|
1153
|
+
target_task_ids=task_id_list,
|
|
1154
|
+
task_query=task_query,
|
|
1155
|
+
)
|
|
1156
|
+
func(annotation_path=annotation_path)
|
|
1157
|
+
|
|
1139
1158
|
|
|
1140
1159
|
def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
1141
1160
|
argument_parser = ArgumentParser(parser)
|
|
@@ -1194,6 +1213,12 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
|
1194
1213
|
help="``--annotation`` を指定しないとき、最新のアノテーションzipを参照します。このオプションを指定すると、アノテーションzipを更新するのに数分待ちます。",
|
|
1195
1214
|
)
|
|
1196
1215
|
|
|
1216
|
+
parser.add_argument(
|
|
1217
|
+
"--temp_dir",
|
|
1218
|
+
type=Path,
|
|
1219
|
+
help="指定したディレクトリに、アノテーションZIPなどの一時ファイルをダウンロードします。",
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1197
1222
|
parser.set_defaults(subcommand_func=main)
|
|
1198
1223
|
|
|
1199
1224
|
|
|
@@ -586,17 +586,15 @@ class ListAnnotationDuration(CommandLine):
|
|
|
586
586
|
|
|
587
587
|
def download_and_print_annotation_duration(project_id: str, temp_dir: Path, *, is_latest: bool, annotation_path: Optional[Path]) -> None:
|
|
588
588
|
if annotation_path is None:
|
|
589
|
-
annotation_path =
|
|
590
|
-
downloading_obj.download_annotation_zip(
|
|
589
|
+
annotation_path = downloading_obj.download_annotation_zip_to_dir(
|
|
591
590
|
project_id,
|
|
592
|
-
|
|
591
|
+
temp_dir,
|
|
593
592
|
is_latest=is_latest,
|
|
594
593
|
)
|
|
595
594
|
|
|
596
|
-
input_data_json_path =
|
|
597
|
-
downloading_obj.download_input_data_json(
|
|
595
|
+
input_data_json_path = downloading_obj.download_input_data_json_to_dir(
|
|
598
596
|
project_id,
|
|
599
|
-
|
|
597
|
+
temp_dir,
|
|
600
598
|
is_latest=is_latest,
|
|
601
599
|
)
|
|
602
600
|
|
|
@@ -2,6 +2,7 @@ import argparse
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import sys
|
|
5
|
+
import tempfile
|
|
5
6
|
from enum import Enum
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Optional
|
|
@@ -135,17 +136,11 @@ class SummarizeTaskCount(CommandLine):
|
|
|
135
136
|
project, _ = self.service.api.get_project(project_id)
|
|
136
137
|
return project["configuration"]["number_of_inspections"]
|
|
137
138
|
|
|
138
|
-
def summarize_task_count(self, project_id: str, *, task_json_path: Optional[Path], is_latest: bool,
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
else:
|
|
142
|
-
# タスク全件ファイルをダウンロードするので、オーナロールかアノテーションユーザロールであることを確認する。
|
|
143
|
-
super().validate_project(project_id, project_member_roles=[ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
|
|
139
|
+
def summarize_task_count(self, project_id: str, *, task_json_path: Optional[Path], is_latest: bool, temp_dir: Optional[Path] = None) -> None:
|
|
140
|
+
# タスク全件ファイルをダウンロードするので、オーナロールかアノテーションユーザロールであることを確認する。
|
|
141
|
+
super().validate_project(project_id, project_member_roles=[ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
|
|
144
142
|
|
|
145
|
-
|
|
146
|
-
task_list = self.service.wrapper.get_all_tasks(project_id)
|
|
147
|
-
else:
|
|
148
|
-
task_list = self.get_task_list_with_downloading_file(project_id, task_json_path, is_latest=is_latest)
|
|
143
|
+
task_list = self.get_task_list_with_downloading_file(project_id, task_json_path, is_latest=is_latest, temp_dir=temp_dir)
|
|
149
144
|
|
|
150
145
|
if len(task_list) == 0:
|
|
151
146
|
logger.info("タスクが0件のため、出力しません。")
|
|
@@ -155,32 +150,57 @@ class SummarizeTaskCount(CommandLine):
|
|
|
155
150
|
task_count_df = create_task_count_summary(task_list, number_of_inspections=number_of_inspections)
|
|
156
151
|
annofabcli.common.utils.print_csv(task_count_df, output=self.output)
|
|
157
152
|
|
|
158
|
-
def get_task_list_with_downloading_file(self, project_id: str, task_json_path: Optional[Path], is_latest: bool) -> list[Task]: # noqa: FBT001
|
|
153
|
+
def get_task_list_with_downloading_file(self, project_id: str, task_json_path: Optional[Path], is_latest: bool, temp_dir: Optional[Path] = None) -> list[Task]: # noqa: FBT001
|
|
159
154
|
if task_json_path is None:
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
155
|
+
if temp_dir is not None:
|
|
156
|
+
downloading_obj = DownloadingFile(self.service)
|
|
157
|
+
task_json_path = downloading_obj.download_task_json_to_dir(
|
|
158
|
+
project_id,
|
|
159
|
+
temp_dir,
|
|
160
|
+
is_latest=is_latest,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
with task_json_path.open(encoding="utf-8") as f:
|
|
164
|
+
task_list = json.load(f)
|
|
165
|
+
return task_list
|
|
166
|
+
else:
|
|
167
|
+
# 一時ディレクトリを作成してその中でダウンロードと読み取りを完結
|
|
168
|
+
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
169
|
+
temp_dir_path = Path(str_temp_dir)
|
|
170
|
+
downloading_obj = DownloadingFile(self.service)
|
|
171
|
+
task_json_path = downloading_obj.download_task_json_to_dir(
|
|
172
|
+
project_id,
|
|
173
|
+
temp_dir_path,
|
|
174
|
+
is_latest=is_latest,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
with task_json_path.open(encoding="utf-8") as f:
|
|
178
|
+
task_list = json.load(f)
|
|
179
|
+
return task_list
|
|
180
|
+
else:
|
|
181
|
+
# task_json_pathが指定されている場合
|
|
182
|
+
with task_json_path.open(encoding="utf-8") as f:
|
|
183
|
+
task_list = json.load(f)
|
|
184
|
+
return task_list
|
|
173
185
|
|
|
174
186
|
def main(self) -> None:
|
|
175
187
|
args = self.args
|
|
176
188
|
project_id = args.project_id
|
|
177
189
|
task_json_path = Path(args.task_json) if args.task_json is not None else None
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
190
|
+
|
|
191
|
+
def process_task_count(temp_dir: Optional[Path]) -> None:
|
|
192
|
+
self.summarize_task_count(
|
|
193
|
+
project_id,
|
|
194
|
+
task_json_path=task_json_path,
|
|
195
|
+
is_latest=args.latest,
|
|
196
|
+
temp_dir=temp_dir,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if args.temp_dir is not None:
|
|
200
|
+
process_task_count(temp_dir=args.temp_dir)
|
|
201
|
+
else:
|
|
202
|
+
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
203
|
+
process_task_count(temp_dir=Path(str_temp_dir))
|
|
184
204
|
|
|
185
205
|
|
|
186
206
|
def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
@@ -201,9 +221,9 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
|
201
221
|
)
|
|
202
222
|
|
|
203
223
|
parser.add_argument(
|
|
204
|
-
"--
|
|
205
|
-
|
|
206
|
-
help="
|
|
224
|
+
"--temp_dir",
|
|
225
|
+
type=Path,
|
|
226
|
+
help="指定したディレクトリに、一時ファイルをダウンロードします。",
|
|
207
227
|
)
|
|
208
228
|
|
|
209
229
|
argument_parser.add_output()
|
|
@@ -3,7 +3,9 @@ from __future__ import annotations
|
|
|
3
3
|
import argparse
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
+
import tempfile
|
|
6
7
|
from enum import Enum
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
from typing import Optional
|
|
8
10
|
|
|
9
11
|
import pandas
|
|
@@ -153,20 +155,29 @@ class SummarizeTaskCountByTaskId(CommandLine):
|
|
|
153
155
|
project_id = args.project_id
|
|
154
156
|
super().validate_project(project_id, [ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
|
|
155
157
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
+
def download_and_process_task_data(temp_dir: Path) -> None:
|
|
159
|
+
if args.task_json is not None:
|
|
160
|
+
task_json_path = args.task_json
|
|
161
|
+
else:
|
|
162
|
+
downloading_obj = DownloadingFile(self.service)
|
|
163
|
+
task_json_path = downloading_obj.download_task_json_to_dir(
|
|
164
|
+
project_id,
|
|
165
|
+
temp_dir,
|
|
166
|
+
is_latest=args.latest,
|
|
167
|
+
wait_options=DEFAULT_WAIT_OPTIONS,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
with open(task_json_path, encoding="utf-8") as f: # noqa: PTH123
|
|
171
|
+
task_list = json.load(f)
|
|
172
|
+
|
|
173
|
+
df = create_task_count_summary_df(task_list, task_id_delimiter=args.task_id_delimiter, task_id_groups=get_json_from_args(args.task_id_groups))
|
|
174
|
+
self.print_summarize_task_count(df)
|
|
175
|
+
|
|
176
|
+
if args.temp_dir is not None:
|
|
177
|
+
download_and_process_task_data(temp_dir=args.temp_dir)
|
|
158
178
|
else:
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
downloading_obj = DownloadingFile(self.service)
|
|
163
|
-
downloading_obj.download_task_json(project_id, dest_path=str(task_json_path), is_latest=args.latest, wait_options=DEFAULT_WAIT_OPTIONS)
|
|
164
|
-
|
|
165
|
-
with open(task_json_path, encoding="utf-8") as f: # noqa: PTH123
|
|
166
|
-
task_list = json.load(f)
|
|
167
|
-
|
|
168
|
-
df = create_task_count_summary_df(task_list, task_id_delimiter=args.task_id_delimiter, task_id_groups=get_json_from_args(args.task_id_groups))
|
|
169
|
-
self.print_summarize_task_count(df)
|
|
179
|
+
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
180
|
+
download_and_process_task_data(temp_dir=Path(str_temp_dir))
|
|
170
181
|
|
|
171
182
|
|
|
172
183
|
def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
@@ -200,6 +211,12 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
|
200
211
|
help="最新のタスク一覧ファイルを参照します。このオプションを指定すると、タスク一覧ファイルを更新するのに数分待ちます。",
|
|
201
212
|
)
|
|
202
213
|
|
|
214
|
+
parser.add_argument(
|
|
215
|
+
"--temp_dir",
|
|
216
|
+
type=Path,
|
|
217
|
+
help="指定したディレクトリに、一時ファイルをダウンロードします。",
|
|
218
|
+
)
|
|
219
|
+
|
|
203
220
|
argument_parser.add_output()
|
|
204
221
|
|
|
205
222
|
parser.set_defaults(subcommand_func=main)
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import tempfile
|
|
4
5
|
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
5
7
|
from typing import Optional
|
|
6
8
|
|
|
7
9
|
import pandas
|
|
@@ -121,23 +123,32 @@ class SummarizeTaskCountByUser(CommandLine):
|
|
|
121
123
|
project_id = args.project_id
|
|
122
124
|
super().validate_project(project_id, [ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
|
|
123
125
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
126
|
+
def download_and_process_task_data(temp_dir: Path) -> None:
|
|
127
|
+
if args.task_json is not None:
|
|
128
|
+
task_json_path = args.task_json
|
|
129
|
+
else:
|
|
130
|
+
downloading_obj = DownloadingFile(self.service)
|
|
131
|
+
task_json_path = downloading_obj.download_task_json_to_dir(
|
|
132
|
+
project_id,
|
|
133
|
+
temp_dir,
|
|
134
|
+
is_latest=args.latest,
|
|
135
|
+
wait_options=DEFAULT_WAIT_OPTIONS,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
with open(task_json_path, encoding="utf-8") as f: # noqa: PTH123
|
|
139
|
+
task_list = json.load(f)
|
|
140
|
+
|
|
141
|
+
df = self.create_summary_df(project_id, task_list)
|
|
142
|
+
if len(df) > 0:
|
|
143
|
+
self.print_summarize_df(df)
|
|
144
|
+
else:
|
|
145
|
+
logger.error("出力対象データが0件のため、出力しません。")
|
|
135
146
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
self.print_summarize_df(df)
|
|
147
|
+
if args.temp_dir is not None:
|
|
148
|
+
download_and_process_task_data(temp_dir=args.temp_dir)
|
|
139
149
|
else:
|
|
140
|
-
|
|
150
|
+
with tempfile.TemporaryDirectory() as str_temp_dir:
|
|
151
|
+
download_and_process_task_data(temp_dir=Path(str_temp_dir))
|
|
141
152
|
|
|
142
153
|
|
|
143
154
|
def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
@@ -157,6 +168,12 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
|
|
|
157
168
|
help="最新のタスク一覧ファイルを参照します。このオプションを指定すると、タスク一覧ファイルを更新するのに数分待ちます。",
|
|
158
169
|
)
|
|
159
170
|
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
"--temp_dir",
|
|
173
|
+
type=Path,
|
|
174
|
+
help="指定したディレクトリに、一時ファイルをダウンロードします。",
|
|
175
|
+
)
|
|
176
|
+
|
|
160
177
|
argument_parser.add_output()
|
|
161
178
|
|
|
162
179
|
parser.set_defaults(subcommand_func=main)
|
|
@@ -42,7 +42,15 @@ class AnnotationCount:
|
|
|
42
42
|
self.df = df
|
|
43
43
|
|
|
44
44
|
@classmethod
|
|
45
|
-
def from_annotation_zip(
|
|
45
|
+
def from_annotation_zip(
|
|
46
|
+
cls,
|
|
47
|
+
annotation_zip: Path,
|
|
48
|
+
project_id: str,
|
|
49
|
+
*,
|
|
50
|
+
get_annotation_count_func: Optional[Callable[[dict[str, Any]], int]] = None,
|
|
51
|
+
include_labels: Optional[list[str]] = None,
|
|
52
|
+
exclude_labels: Optional[list[str]] = None,
|
|
53
|
+
) -> AnnotationCount:
|
|
46
54
|
"""
|
|
47
55
|
アノテーションZIPファイルからインスタンスを生成します。
|
|
48
56
|
|
|
@@ -51,15 +59,35 @@ class AnnotationCount:
|
|
|
51
59
|
project_id: プロジェクトID。DataFrameに格納するために使用します。
|
|
52
60
|
get_annotation_count_func: アノテーション数を算出するための関数。
|
|
53
61
|
引数はdict, 戻り値はintの関数です。未指定の場合は、detailsの数をアノテーション数になります。
|
|
62
|
+
include_labels: 集計対象に含めるラベル名のリスト
|
|
63
|
+
exclude_labels: 集計対象から除外するラベル名のリスト
|
|
54
64
|
|
|
55
65
|
"""
|
|
56
|
-
|
|
57
66
|
logger.debug(f"アノテーションZIPファイルを読み込みます。 :: project_id='{project_id}', file='{annotation_zip!s}'")
|
|
58
67
|
|
|
59
68
|
def get_annotation_count_default(simple_annotation: dict[str, Any]) -> int:
|
|
60
69
|
return len(simple_annotation["details"])
|
|
61
70
|
|
|
62
|
-
|
|
71
|
+
def get_annotation_count_with_filter(simple_annotation: dict[str, Any]) -> int:
|
|
72
|
+
details = simple_annotation["details"]
|
|
73
|
+
|
|
74
|
+
if include_labels is not None:
|
|
75
|
+
details = [d for d in details if d["label"] in include_labels]
|
|
76
|
+
elif exclude_labels is not None:
|
|
77
|
+
details = [d for d in details if d["label"] not in exclude_labels]
|
|
78
|
+
|
|
79
|
+
if get_annotation_count_func is not None:
|
|
80
|
+
# カスタム関数にフィルタ済みのdetailsを渡すため、一時的にsimple_annotationを変更
|
|
81
|
+
filtered_annotation = simple_annotation.copy()
|
|
82
|
+
filtered_annotation["details"] = details
|
|
83
|
+
return get_annotation_count_func(filtered_annotation)
|
|
84
|
+
else:
|
|
85
|
+
return len(details)
|
|
86
|
+
|
|
87
|
+
get_annotation_count: Callable[[dict[str, Any]], int]
|
|
88
|
+
if include_labels is not None or exclude_labels is not None:
|
|
89
|
+
get_annotation_count = get_annotation_count_with_filter
|
|
90
|
+
elif get_annotation_count_func is not None:
|
|
63
91
|
get_annotation_count = get_annotation_count_func
|
|
64
92
|
else:
|
|
65
93
|
get_annotation_count = get_annotation_count_default
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import pandas
|
|
9
|
+
from annofabapi.parser import lazy_parse_simple_annotation_zip
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AnnotationDuration:
|
|
15
|
+
"""
|
|
16
|
+
アノテーション時間が格納されたDataFrameをラップしたクラスです。
|
|
17
|
+
|
|
18
|
+
DataFrameは`project_id`,`task_id`のペアがユニークなキーです。
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def columns(cls) -> list[str]:
|
|
23
|
+
return [
|
|
24
|
+
"project_id",
|
|
25
|
+
"task_id",
|
|
26
|
+
"annotation_duration_second",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def required_columns_exist(cls, df: pandas.DataFrame) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
必須の列が存在するかどうかを返します。
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
必須の列が存在するかどうか
|
|
36
|
+
"""
|
|
37
|
+
return len(set(cls.columns()) - set(df.columns)) == 0
|
|
38
|
+
|
|
39
|
+
def __init__(self, df: pandas.DataFrame) -> None:
|
|
40
|
+
if not self.required_columns_exist(df):
|
|
41
|
+
raise ValueError(f"引数`df`には、{self.columns()}の列が必要です。 :: {df.columns=}")
|
|
42
|
+
self.df = df
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_annotation_zip(
|
|
46
|
+
cls,
|
|
47
|
+
annotation_zip: Path,
|
|
48
|
+
project_id: str,
|
|
49
|
+
*,
|
|
50
|
+
include_labels: Optional[list[str]] = None,
|
|
51
|
+
exclude_labels: Optional[list[str]] = None,
|
|
52
|
+
) -> AnnotationDuration:
|
|
53
|
+
"""
|
|
54
|
+
アノテーションZIPファイルからインスタンスを生成します。
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
annotation_zip: アノテーションZIPファイルのパス
|
|
58
|
+
project_id: プロジェクトID。DataFrameに格納するために使用します。
|
|
59
|
+
include_labels: 集計対象に含めるラベル名のリスト
|
|
60
|
+
exclude_labels: 集計対象から除外するラベル名のリスト
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
logger.debug(f"アノテーションZIPファイルから区間アノテーションの長さを計算します。 :: project_id='{project_id}', file='{annotation_zip!s}'")
|
|
64
|
+
|
|
65
|
+
result: dict[tuple[str, str], float] = defaultdict(float) # key:(project_id, task_id), value:合計アノテーション時間(秒)
|
|
66
|
+
|
|
67
|
+
for index, parser in enumerate(lazy_parse_simple_annotation_zip(annotation_zip)):
|
|
68
|
+
simple_annotation = parser.load_json()
|
|
69
|
+
|
|
70
|
+
total_duration = 0.0
|
|
71
|
+
for detail in simple_annotation["details"]:
|
|
72
|
+
# ラベルフィルタリングの処理
|
|
73
|
+
if include_labels is not None:
|
|
74
|
+
if detail["label"] not in include_labels:
|
|
75
|
+
continue
|
|
76
|
+
elif exclude_labels is not None and detail["label"] in exclude_labels:
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
# データ形式に応じてアノテーション時間を計算
|
|
80
|
+
data = detail["data"]
|
|
81
|
+
|
|
82
|
+
if data["_type"] == "Range":
|
|
83
|
+
# 区間アノテーションの場合
|
|
84
|
+
begin = data["begin"]
|
|
85
|
+
end = data["end"]
|
|
86
|
+
total_duration += (end - begin) / 1000.0 # ミリ秒から秒に変換
|
|
87
|
+
|
|
88
|
+
result[(project_id, parser.task_id)] += total_duration
|
|
89
|
+
|
|
90
|
+
if (index + 1) % 10000 == 0:
|
|
91
|
+
logger.debug(f"{index + 1}件のアノテーションJSONを読み込みました。 :: project_id='{project_id}', file='{annotation_zip!s}'")
|
|
92
|
+
|
|
93
|
+
result_list = [(project_id, task_id, duration) for (project_id, task_id), duration in result.items()]
|
|
94
|
+
|
|
95
|
+
if len(result_list) == 0:
|
|
96
|
+
return cls.empty()
|
|
97
|
+
|
|
98
|
+
df = pandas.DataFrame(result_list, columns=cls.columns())
|
|
99
|
+
return cls(df)
|
|
100
|
+
|
|
101
|
+
def is_empty(self) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
空のデータフレームを持つかどうかを返します。
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
空のデータフレームを持つかどうか
|
|
107
|
+
"""
|
|
108
|
+
return len(self.df) == 0
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def empty(cls) -> AnnotationDuration:
|
|
112
|
+
"""空のデータフレームを持つインスタンスを生成します。"""
|
|
113
|
+
|
|
114
|
+
df_dtype: dict[str, str] = {
|
|
115
|
+
"project_id": "string",
|
|
116
|
+
"task_id": "string",
|
|
117
|
+
"annotation_duration_second": "float64",
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
df = pandas.DataFrame(columns=cls.columns()).astype(df_dtype)
|
|
121
|
+
return cls(df)
|