PyPI - annofabcli - Versions diffs - 1.109.0__py3-none-any.whl → 1.111.0__py3-none-any.whl - Mend

annofabcli 1.109.0py3-none-any.whl → 1.111.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

annofabcli/statistics/list_annotation_count.py CHANGED Viewed

@@ -505,7 +505,7 @@ class AttributeCountCsv:
         # `task_id`列など`basic_columns`も`fillna`対象だが、nanではないはずので問題ない
         df.fillna(0, inplace=True)
-        print_csv(df, output=str(output_file))
+        print_csv(df, output=output_file)
     def print_csv_by_input_data(
         self,
@@ -553,7 +553,7 @@ class AttributeCountCsv:
         value_columns = self._value_columns(counter_list, prior_attribute_columns)
         df = df.fillna(dict.fromkeys(value_columns, 0))
-        print_csv(df, output=str(output_file))
+        print_csv(df, output=output_file)
 class LabelCountCsv:
@@ -612,7 +612,7 @@ class LabelCountCsv:
         # NaNを0に変換する
         # `basic_columns`は必ずnanではないので、すべての列に対してfillnaを実行しても問題ないはず
         df.fillna(0, inplace=True)
-        print_csv(df, output=str(output_file))
+        print_csv(df, output=output_file)
     def print_csv_by_input_data(
         self,
@@ -659,7 +659,7 @@ class LabelCountCsv:
         value_columns = self._value_columns(counter_list, prior_label_columns)
         df = df.fillna(dict.fromkeys(value_columns, 0))
-        print_csv(df, output=str(output_file))
+        print_csv(df, output=output_file)
 class AnnotationSpecs:
@@ -1099,15 +1099,13 @@ class ListAnnotationCount(CommandLine):
         downloading_obj = DownloadingFile(self.service)
-        # `NamedTemporaryFile`を使わない理由: Windowsで`PermissionError`が発生するため
-        # https://qiita.com/yuji38kwmt/items/c6f50e1fc03dafdcdda0 参考
-        with tempfile.TemporaryDirectory() as str_temp_dir:
+        def download_and_process_annotation(temp_dir: Path, *, is_latest: bool, annotation_path: Optional[Path]) -> None:
             # タスク全件ファイルは、フレーム番号を参照するのに利用する
             if project_id is not None:
-                task_json_path = Path(str_temp_dir) / f"{project_id}__task.json"
-                downloading_obj.download_task_json(
+                task_json_path = downloading_obj.download_task_json_to_dir(
                     project_id,
-                    dest_path=str(task_json_path),
+                    temp_dir,
+                    is_latest=is_latest,
                 )
             else:
                 task_json_path = None
@@ -1126,16 +1124,37 @@ class ListAnnotationCount(CommandLine):
             if annotation_path is None:
                 assert project_id is not None
-                annotation_path = Path(str_temp_dir) / f"{project_id}__annotation.zip"
-                downloading_obj.download_annotation_zip(
+                annotation_path = downloading_obj.download_annotation_zip_to_dir(
                     project_id,
-                    dest_path=str(annotation_path),
-                    is_latest=args.latest,
+                    temp_dir,
+                    is_latest=is_latest,
                 )
                 func(annotation_path=annotation_path)
             else:
                 func(annotation_path=annotation_path)
+        if project_id is not None:
+            if args.temp_dir is not None:
+                download_and_process_annotation(temp_dir=args.temp_dir, is_latest=args.latest, annotation_path=annotation_path)
+            else:
+                with tempfile.TemporaryDirectory() as str_temp_dir:
+                    download_and_process_annotation(temp_dir=Path(str_temp_dir), is_latest=args.latest, annotation_path=annotation_path)
+        else:
+            # プロジェクトIDが指定されていない場合は、アノテーションパスが必須なので、一時ディレクトリは不要
+            assert annotation_path is not None
+            func = partial(
+                main_obj.print_annotation_counter,
+                project_id=project_id,
+                task_json_path=None,
+                group_by=group_by,
+                csv_type=csv_type,
+                arg_format=arg_format,
+                output_file=output_file,
+                target_task_ids=task_id_list,
+                task_query=task_query,
+            )
+            func(annotation_path=annotation_path)
 def parse_args(parser: argparse.ArgumentParser) -> None:
     argument_parser = ArgumentParser(parser)
@@ -1194,6 +1213,12 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
         help="``--annotation`` を指定しないとき、最新のアノテーションzipを参照します。このオプションを指定すると、アノテーションzipを更新するのに数分待ちます。",
     )
+    parser.add_argument(
+        "--temp_dir",
+        type=Path,
+        help="指定したディレクトリに、アノテーションZIPなどの一時ファイルをダウンロードします。",
+    )
     parser.set_defaults(subcommand_func=main)

annofabcli/statistics/list_annotation_duration.py CHANGED Viewed

@@ -586,17 +586,15 @@ class ListAnnotationDuration(CommandLine):
         def download_and_print_annotation_duration(project_id: str, temp_dir: Path, *, is_latest: bool, annotation_path: Optional[Path]) -> None:
             if annotation_path is None:
-                annotation_path = temp_dir / f"{project_id}__annotation.zip"
-                downloading_obj.download_annotation_zip(
+                annotation_path = downloading_obj.download_annotation_zip_to_dir(
                     project_id,
-                    dest_path=annotation_path,
+                    temp_dir,
                     is_latest=is_latest,
                 )
-            input_data_json_path = temp_dir / f"{project_id}__input_data.json"
-            downloading_obj.download_input_data_json(
+            input_data_json_path = downloading_obj.download_input_data_json_to_dir(
                 project_id,
-                dest_path=input_data_json_path,
+                temp_dir,
                 is_latest=is_latest,
             )

annofabcli/statistics/summarize_task_count.py CHANGED Viewed

@@ -2,6 +2,7 @@ import argparse
 import json
 import logging
 import sys
+import tempfile
 from enum import Enum
 from pathlib import Path
 from typing import Optional
@@ -135,17 +136,11 @@ class SummarizeTaskCount(CommandLine):
         project, _ = self.service.api.get_project(project_id)
         return project["configuration"]["number_of_inspections"]
-    def summarize_task_count(self, project_id: str, *, task_json_path: Optional[Path], is_latest: bool, is_execute_get_tasks_api: bool) -> None:
-        if is_execute_get_tasks_api:
-            super().validate_project(project_id)
-        else:
-            # タスク全件ファイルをダウンロードするので、オーナロールかアノテーションユーザロールであることを確認する。
-            super().validate_project(project_id, project_member_roles=[ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
+    def summarize_task_count(self, project_id: str, *, task_json_path: Optional[Path], is_latest: bool, temp_dir: Optional[Path] = None) -> None:
+        # タスク全件ファイルをダウンロードするので、オーナロールかアノテーションユーザロールであることを確認する。
+        super().validate_project(project_id, project_member_roles=[ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
-        if is_execute_get_tasks_api:
-            task_list = self.service.wrapper.get_all_tasks(project_id)
-        else:
-            task_list = self.get_task_list_with_downloading_file(project_id, task_json_path, is_latest=is_latest)
+        task_list = self.get_task_list_with_downloading_file(project_id, task_json_path, is_latest=is_latest, temp_dir=temp_dir)
         if len(task_list) == 0:
             logger.info("タスクが0件のため、出力しません。")
@@ -155,32 +150,57 @@ class SummarizeTaskCount(CommandLine):
         task_count_df = create_task_count_summary(task_list, number_of_inspections=number_of_inspections)
         annofabcli.common.utils.print_csv(task_count_df, output=self.output)
-    def get_task_list_with_downloading_file(self, project_id: str, task_json_path: Optional[Path], is_latest: bool) -> list[Task]:  # noqa: FBT001
+    def get_task_list_with_downloading_file(self, project_id: str, task_json_path: Optional[Path], is_latest: bool, temp_dir: Optional[Path] = None) -> list[Task]:  # noqa: FBT001
         if task_json_path is None:
-            cache_dir = annofabcli.common.utils.get_cache_dir()
-            task_json_path = cache_dir / f"task-{project_id}.json"
-            downloading_obj = DownloadingFile(self.service)
-            downloading_obj.download_task_json(
-                project_id,
-                dest_path=str(task_json_path),
-                is_latest=is_latest,
-            )
-        with task_json_path.open(encoding="utf-8") as f:
-            task_list = json.load(f)
-            return task_list
+            if temp_dir is not None:
+                downloading_obj = DownloadingFile(self.service)
+                task_json_path = downloading_obj.download_task_json_to_dir(
+                    project_id,
+                    temp_dir,
+                    is_latest=is_latest,
+                )
+                with task_json_path.open(encoding="utf-8") as f:
+                    task_list = json.load(f)
+                    return task_list
+            else:
+                # 一時ディレクトリを作成してその中でダウンロードと読み取りを完結
+                with tempfile.TemporaryDirectory() as str_temp_dir:
+                    temp_dir_path = Path(str_temp_dir)
+                    downloading_obj = DownloadingFile(self.service)
+                    task_json_path = downloading_obj.download_task_json_to_dir(
+                        project_id,
+                        temp_dir_path,
+                        is_latest=is_latest,
+                    )
+                    with task_json_path.open(encoding="utf-8") as f:
+                        task_list = json.load(f)
+                        return task_list
+        else:
+            # task_json_pathが指定されている場合
+            with task_json_path.open(encoding="utf-8") as f:
+                task_list = json.load(f)
+                return task_list
     def main(self) -> None:
         args = self.args
         project_id = args.project_id
         task_json_path = Path(args.task_json) if args.task_json is not None else None
-        self.summarize_task_count(
-            project_id,
-            task_json_path=task_json_path,
-            is_latest=args.latest,
-            is_execute_get_tasks_api=args.execute_get_tasks_api,
-        )
+        def process_task_count(temp_dir: Optional[Path]) -> None:
+            self.summarize_task_count(
+                project_id,
+                task_json_path=task_json_path,
+                is_latest=args.latest,
+                temp_dir=temp_dir,
+            )
+        if args.temp_dir is not None:
+            process_task_count(temp_dir=args.temp_dir)
+        else:
+            with tempfile.TemporaryDirectory() as str_temp_dir:
+                process_task_count(temp_dir=Path(str_temp_dir))
 def parse_args(parser: argparse.ArgumentParser) -> None:
@@ -201,9 +221,9 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
     )
     parser.add_argument(
-        "--execute_get_tasks_api",
-        action="store_true",
-        help="[EXPERIMENTAL] ``getTasks`` APIを実行して、タスク情報を参照します。タスク数が少ないプロジェクトで、最新のタスク情報を参照したいときに利用できます。",
+        "--temp_dir",
+        type=Path,
+        help="指定したディレクトリに、一時ファイルをダウンロードします。",
     )
     argument_parser.add_output()

annofabcli/statistics/summarize_task_count_by_task_id_group.py CHANGED Viewed

@@ -3,7 +3,9 @@ from __future__ import annotations
 import argparse
 import json
 import logging
+import tempfile
 from enum import Enum
+from pathlib import Path
 from typing import Optional
 import pandas
@@ -153,20 +155,29 @@ class SummarizeTaskCountByTaskId(CommandLine):
         project_id = args.project_id
         super().validate_project(project_id, [ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
-        if args.task_json is not None:
-            task_json_path = args.task_json
+        def download_and_process_task_data(temp_dir: Path) -> None:
+            if args.task_json is not None:
+                task_json_path = args.task_json
+            else:
+                downloading_obj = DownloadingFile(self.service)
+                task_json_path = downloading_obj.download_task_json_to_dir(
+                    project_id,
+                    temp_dir,
+                    is_latest=args.latest,
+                    wait_options=DEFAULT_WAIT_OPTIONS,
+                )
+            with open(task_json_path, encoding="utf-8") as f:  # noqa: PTH123
+                task_list = json.load(f)
+            df = create_task_count_summary_df(task_list, task_id_delimiter=args.task_id_delimiter, task_id_groups=get_json_from_args(args.task_id_groups))
+            self.print_summarize_task_count(df)
+        if args.temp_dir is not None:
+            download_and_process_task_data(temp_dir=args.temp_dir)
         else:
-            cache_dir = annofabcli.common.utils.get_cache_dir()
-            task_json_path = cache_dir / f"{project_id}-task.json"
-            downloading_obj = DownloadingFile(self.service)
-            downloading_obj.download_task_json(project_id, dest_path=str(task_json_path), is_latest=args.latest, wait_options=DEFAULT_WAIT_OPTIONS)
-        with open(task_json_path, encoding="utf-8") as f:  # noqa: PTH123
-            task_list = json.load(f)
-        df = create_task_count_summary_df(task_list, task_id_delimiter=args.task_id_delimiter, task_id_groups=get_json_from_args(args.task_id_groups))
-        self.print_summarize_task_count(df)
+            with tempfile.TemporaryDirectory() as str_temp_dir:
+                download_and_process_task_data(temp_dir=Path(str_temp_dir))
 def parse_args(parser: argparse.ArgumentParser) -> None:
@@ -200,6 +211,12 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
         help="最新のタスク一覧ファイルを参照します。このオプションを指定すると、タスク一覧ファイルを更新するのに数分待ちます。",
     )
+    parser.add_argument(
+        "--temp_dir",
+        type=Path,
+        help="指定したディレクトリに、一時ファイルをダウンロードします。",
+    )
     argument_parser.add_output()
     parser.set_defaults(subcommand_func=main)

annofabcli/statistics/summarize_task_count_by_user.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import argparse
 import json
 import logging
+import tempfile
 from enum import Enum
+from pathlib import Path
 from typing import Optional
 import pandas
@@ -121,23 +123,32 @@ class SummarizeTaskCountByUser(CommandLine):
         project_id = args.project_id
         super().validate_project(project_id, [ProjectMemberRole.OWNER, ProjectMemberRole.TRAINING_DATA_USER])
-        if args.task_json is not None:
-            task_json_path = args.task_json
-        else:
-            cache_dir = annofabcli.common.utils.get_cache_dir()
-            task_json_path = cache_dir / f"{project_id}-task.json"
-            downloading_obj = DownloadingFile(self.service)
-            downloading_obj.download_task_json(project_id, dest_path=str(task_json_path), is_latest=args.latest, wait_options=DEFAULT_WAIT_OPTIONS)
-        with open(task_json_path, encoding="utf-8") as f:  # noqa: PTH123
-            task_list = json.load(f)
+        def download_and_process_task_data(temp_dir: Path) -> None:
+            if args.task_json is not None:
+                task_json_path = args.task_json
+            else:
+                downloading_obj = DownloadingFile(self.service)
+                task_json_path = downloading_obj.download_task_json_to_dir(
+                    project_id,
+                    temp_dir,
+                    is_latest=args.latest,
+                    wait_options=DEFAULT_WAIT_OPTIONS,
+                )
+            with open(task_json_path, encoding="utf-8") as f:  # noqa: PTH123
+                task_list = json.load(f)
+            df = self.create_summary_df(project_id, task_list)
+            if len(df) > 0:
+                self.print_summarize_df(df)
+            else:
+                logger.error("出力対象データが0件のため、出力しません。")
-        df = self.create_summary_df(project_id, task_list)
-        if len(df) > 0:
-            self.print_summarize_df(df)
+        if args.temp_dir is not None:
+            download_and_process_task_data(temp_dir=args.temp_dir)
         else:
-            logger.error("出力対象データが0件のため、出力しません。")
+            with tempfile.TemporaryDirectory() as str_temp_dir:
+                download_and_process_task_data(temp_dir=Path(str_temp_dir))
 def parse_args(parser: argparse.ArgumentParser) -> None:
@@ -157,6 +168,12 @@ def parse_args(parser: argparse.ArgumentParser) -> None:
         help="最新のタスク一覧ファイルを参照します。このオプションを指定すると、タスク一覧ファイルを更新するのに数分待ちます。",
     )
+    parser.add_argument(
+        "--temp_dir",
+        type=Path,
+        help="指定したディレクトリに、一時ファイルをダウンロードします。",
+    )
     argument_parser.add_output()
     parser.set_defaults(subcommand_func=main)

annofabcli/statistics/visualization/dataframe/annotation_count.py CHANGED Viewed

@@ -42,7 +42,15 @@ class AnnotationCount:
         self.df = df
     @classmethod
-    def from_annotation_zip(cls, annotation_zip: Path, project_id: str, *, get_annotation_count_func: Optional[Callable[[dict[str, Any]], int]] = None) -> AnnotationCount:
+    def from_annotation_zip(
+        cls,
+        annotation_zip: Path,
+        project_id: str,
+        *,
+        get_annotation_count_func: Optional[Callable[[dict[str, Any]], int]] = None,
+        include_labels: Optional[list[str]] = None,
+        exclude_labels: Optional[list[str]] = None,
+    ) -> AnnotationCount:
         """
         アノテーションZIPファイルからインスタンスを生成します。
@@ -51,15 +59,35 @@ class AnnotationCount:
             project_id: プロジェクトID。DataFrameに格納するために使用します。
             get_annotation_count_func: アノテーション数を算出するための関数。
                 引数はdict, 戻り値はintの関数です。未指定の場合は、detailsの数をアノテーション数になります。
+            include_labels: 集計対象に含めるラベル名のリスト
+            exclude_labels: 集計対象から除外するラベル名のリスト
         """
         logger.debug(f"アノテーションZIPファイルを読み込みます。 :: project_id='{project_id}', file='{annotation_zip!s}'")
         def get_annotation_count_default(simple_annotation: dict[str, Any]) -> int:
             return len(simple_annotation["details"])
-        if get_annotation_count_func is not None:
+        def get_annotation_count_with_filter(simple_annotation: dict[str, Any]) -> int:
+            details = simple_annotation["details"]
+            if include_labels is not None:
+                details = [d for d in details if d["label"] in include_labels]
+            elif exclude_labels is not None:
+                details = [d for d in details if d["label"] not in exclude_labels]
+            if get_annotation_count_func is not None:
+                # カスタム関数にフィルタ済みのdetailsを渡すため、一時的にsimple_annotationを変更
+                filtered_annotation = simple_annotation.copy()
+                filtered_annotation["details"] = details
+                return get_annotation_count_func(filtered_annotation)
+            else:
+                return len(details)
+        get_annotation_count: Callable[[dict[str, Any]], int]
+        if include_labels is not None or exclude_labels is not None:
+            get_annotation_count = get_annotation_count_with_filter
+        elif get_annotation_count_func is not None:
             get_annotation_count = get_annotation_count_func
         else:
             get_annotation_count = get_annotation_count_default

annofabcli/statistics/visualization/dataframe/annotation_duration.py ADDED Viewed

@@ -0,0 +1,121 @@
+from __future__ import annotations
+import logging
+from collections import defaultdict
+from pathlib import Path
+from typing import Optional
+import pandas
+from annofabapi.parser import lazy_parse_simple_annotation_zip
+logger = logging.getLogger(__name__)
+class AnnotationDuration:
+    """
+    アノテーション時間が格納されたDataFrameをラップしたクラスです。
+    DataFrameは`project_id`,`task_id`のペアがユニークなキーです。
+    """
+    @classmethod
+    def columns(cls) -> list[str]:
+        return [
+            "project_id",
+            "task_id",
+            "annotation_duration_second",
+        ]
+    @classmethod
+    def required_columns_exist(cls, df: pandas.DataFrame) -> bool:
+        """
+        必須の列が存在するかどうかを返します。
+        Returns:
+            必須の列が存在するかどうか
+        """
+        return len(set(cls.columns()) - set(df.columns)) == 0
+    def __init__(self, df: pandas.DataFrame) -> None:
+        if not self.required_columns_exist(df):
+            raise ValueError(f"引数`df`には、{self.columns()}の列が必要です。 :: {df.columns=}")
+        self.df = df
+    @classmethod
+    def from_annotation_zip(
+        cls,
+        annotation_zip: Path,
+        project_id: str,
+        *,
+        include_labels: Optional[list[str]] = None,
+        exclude_labels: Optional[list[str]] = None,
+    ) -> AnnotationDuration:
+        """
+        アノテーションZIPファイルからインスタンスを生成します。
+        Args:
+            annotation_zip: アノテーションZIPファイルのパス
+            project_id: プロジェクトID。DataFrameに格納するために使用します。
+            include_labels: 集計対象に含めるラベル名のリスト
+            exclude_labels: 集計対象から除外するラベル名のリスト
+        """
+        logger.debug(f"アノテーションZIPファイルから区間アノテーションの長さを計算します。 :: project_id='{project_id}', file='{annotation_zip!s}'")
+        result: dict[tuple[str, str], float] = defaultdict(float)  # key:(project_id, task_id), value:合計アノテーション時間（秒）
+        for index, parser in enumerate(lazy_parse_simple_annotation_zip(annotation_zip)):
+            simple_annotation = parser.load_json()
+            total_duration = 0.0
+            for detail in simple_annotation["details"]:
+                # ラベルフィルタリングの処理
+                if include_labels is not None:
+                    if detail["label"] not in include_labels:
+                        continue
+                elif exclude_labels is not None and detail["label"] in exclude_labels:
+                    continue
+                # データ形式に応じてアノテーション時間を計算
+                data = detail["data"]
+                if data["_type"] == "Range":
+                    # 区間アノテーションの場合
+                    begin = data["begin"]
+                    end = data["end"]
+                    total_duration += (end - begin) / 1000.0  # ミリ秒から秒に変換
+            result[(project_id, parser.task_id)] += total_duration
+            if (index + 1) % 10000 == 0:
+                logger.debug(f"{index + 1}件のアノテーションJSONを読み込みました。 :: project_id='{project_id}', file='{annotation_zip!s}'")
+        result_list = [(project_id, task_id, duration) for (project_id, task_id), duration in result.items()]
+        if len(result_list) == 0:
+            return cls.empty()
+        df = pandas.DataFrame(result_list, columns=cls.columns())
+        return cls(df)
+    def is_empty(self) -> bool:
+        """
+        空のデータフレームを持つかどうかを返します。
+        Returns:
+            空のデータフレームを持つかどうか
+        """
+        return len(self.df) == 0
+    @classmethod
+    def empty(cls) -> AnnotationDuration:
+        """空のデータフレームを持つインスタンスを生成します。"""
+        df_dtype: dict[str, str] = {
+            "project_id": "string",
+            "task_id": "string",
+            "annotation_duration_second": "float64",
+        }
+        df = pandas.DataFrame(columns=cls.columns()).astype(df_dtype)
+        return cls(df)

annofabcli 1.109.0__py3-none-any.whl → 1.111.0__py3-none-any.whl

annofabcli 1.109.0py3-none-any.whl → 1.111.0py3-none-any.whl