sai-pg 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sai/__init__.py ADDED
@@ -0,0 +1,18 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
sai/__main__.py ADDED
@@ -0,0 +1,73 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import argparse
22
+ from sai.parsers.score_parser import add_score_parser
23
+ from sai.parsers.outlier_parser import add_outlier_parser
24
+ from sai.parsers.plot_parser import add_plot_parser
25
+
26
+
27
+ def _set_sigpipe_handler() -> None:
28
+ """
29
+ Sets the signal handler for SIGPIPE signals on POSIX systems.
30
+
31
+ """
32
+ import os
33
+ import signal
34
+
35
+ if os.name == "posix":
36
+ # Set signal handler for SIGPIPE to quietly kill the program.
37
+ signal.signal(signal.SIGPIPE, signal.SIG_DFL)
38
+
39
+
40
+ def _sai_cli_parser() -> argparse.ArgumentParser:
41
+ """
42
+ Initializes and configures the command-line interface parser
43
+ for sai.
44
+
45
+ Returns
46
+ -------
47
+ top_parser : argparse.ArgumentParser
48
+ A configured command-line interface parser.
49
+ """
50
+ top_parser = argparse.ArgumentParser()
51
+ subparsers = top_parser.add_subparsers(dest="subcommand")
52
+ subparsers.required = True
53
+
54
+ add_score_parser(subparsers)
55
+ add_outlier_parser(subparsers)
56
+ add_plot_parser(subparsers)
57
+
58
+ return top_parser
59
+
60
+
61
+ def main(arg_list: list = None) -> None:
62
+ """
63
+ Main entry for sai.
64
+
65
+ Parameters
66
+ ----------
67
+ arg_list : list, optional
68
+ A list containing arguments for sai. Default: None.
69
+ """
70
+ _set_sigpipe_handler()
71
+ parser = _sai_cli_parser()
72
+ args = parser.parse_args(arg_list)
73
+ args.runner(args)
@@ -0,0 +1,18 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
@@ -0,0 +1,169 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import argparse
22
+ import os
23
+ import re
24
+
25
+
26
+ def positive_int(value: str) -> int:
27
+ """
28
+ Validates if the provided string represents a positive integer.
29
+
30
+ Parameters
31
+ ----------
32
+ value : str
33
+ The value to validate.
34
+
35
+ Returns
36
+ -------
37
+ int
38
+ The validated positive integer.
39
+
40
+ Raises
41
+ ------
42
+ argparse.ArgumentTypeError
43
+ If the value is not a valid integer or positive integer.
44
+ """
45
+ if value is not None:
46
+ try:
47
+ value = int(value)
48
+ except ValueError:
49
+ raise argparse.ArgumentTypeError(f"{value} is not a valid integer")
50
+ if value <= 0:
51
+ raise argparse.ArgumentTypeError(f"{value} is not a positive integer")
52
+ return value
53
+
54
+
55
+ def positive_number(value: str) -> float:
56
+ """
57
+ Validates if the provided string represents a positive number.
58
+
59
+ Parameters
60
+ ----------
61
+ value : str
62
+ The value to validate.
63
+
64
+ Returns
65
+ -------
66
+ float
67
+ The validated positive number.
68
+
69
+ Raises
70
+ ------
71
+ argparse.ArgumentTypeError
72
+ If the value is not a valid number or positive number.
73
+ """
74
+ if value is not None:
75
+ try:
76
+ value = float(value)
77
+ except ValueError:
78
+ raise argparse.ArgumentTypeError(f"{value} is not a valid number")
79
+ if value <= 0:
80
+ raise argparse.ArgumentTypeError(f"{value} is not a positive number")
81
+ return value
82
+
83
+
84
+ def between_zero_and_one(value: str) -> float:
85
+ """
86
+ Validates if the provided string represents a number between 0 and 1 (inclusive).
87
+
88
+ Parameters
89
+ ----------
90
+ value : str
91
+ The value to validate.
92
+
93
+ Returns
94
+ -------
95
+ float
96
+ The validated number between 0 and 1.
97
+
98
+ Raises
99
+ ------
100
+ argparse.ArgumentTypeError
101
+ If the value is not a valid number or is not between 0 and 1.
102
+ """
103
+ if value is not None:
104
+ try:
105
+ value = float(value)
106
+ except ValueError:
107
+ raise argparse.ArgumentTypeError(f"{value} is not a valid number")
108
+ if not (0 <= value <= 1):
109
+ raise argparse.ArgumentTypeError(
110
+ f"{value} is not between 0 and 1 (inclusive)"
111
+ )
112
+ return value
113
+
114
+
115
+ def existed_file(value: str) -> str:
116
+ """
117
+ Validates if the provided string is a path to an existing file.
118
+
119
+ Parameters
120
+ ----------
121
+ value : str
122
+ The path to validate.
123
+
124
+ Returns
125
+ -------
126
+ str
127
+ The validated file path.
128
+
129
+ Raises
130
+ ------
131
+ argparse.ArgumentTypeError
132
+ If the file does not exist.
133
+ """
134
+ if value is not None:
135
+ if not os.path.isfile(value):
136
+ raise argparse.ArgumentTypeError(f"{value} is not found")
137
+ return value
138
+
139
+
140
+ def validate_stat_type(value: str) -> str:
141
+ """
142
+ Validate the input `stat_type`.
143
+
144
+ Parameters
145
+ ----------
146
+ value : str
147
+ The statistic type to validate. Must be either:
148
+ - "U" : Compute the U statistic.
149
+ - "QXX" : Compute the Q statistic, where "XX" is a one or two-digit integer
150
+ representing the quantile percentage (e.g., "Q95" for 95th quantile).
151
+
152
+ Returns
153
+ -------
154
+ str
155
+ The validated `stat_type`, either "U" or "QXX".
156
+
157
+ Raises
158
+ ------
159
+ argparse.ArgumentTypeError
160
+ If the input does not match the expected format ("U" or "QXX").
161
+ """
162
+ if re.fullmatch(
163
+ r"[UQ]\d{2}", value
164
+ ): # Matches U or Q followed by exactly two digits
165
+ return value
166
+ else:
167
+ raise argparse.ArgumentTypeError(
168
+ f"Invalid --stat-type: {value}. Must be 'UXX' or 'QXX' (e.g., 'U05' for x > 0.05, 'Q95' for quantile = 0.95)."
169
+ )
@@ -0,0 +1,76 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import argparse
22
+ from sai.parsers.argument_validation import existed_file
23
+ from sai.parsers.argument_validation import between_zero_and_one
24
+ from sai.sai import outlier
25
+
26
+
27
+ def _run_outlier(args: argparse.Namespace) -> None:
28
+ """
29
+ Runs the outlier detection process based on command-line arguments.
30
+
31
+ Parameters
32
+ ----------
33
+ args : argparse.Namespace
34
+ Parsed command-line arguments containing input score file,
35
+ output file, quantile threshold, and stat type.
36
+ """
37
+ # Call the outlier function with parsed arguments
38
+ outlier(
39
+ score_file=args.score,
40
+ output=args.output,
41
+ quantile=args.quantile,
42
+ )
43
+
44
+
45
+ def add_outlier_parser(subparsers: argparse.ArgumentParser) -> None:
46
+ """
47
+ Initializes and configures the command-line interface parser
48
+ for the outlier subcommand.
49
+
50
+ Parameters
51
+ ----------
52
+ subparsers : argparse.ArgumentParser
53
+ A command-line interface parser to be configured.
54
+ """
55
+ parser = subparsers.add_parser(
56
+ "outlier", help="Detect and output outlier rows based on quantile thresholds."
57
+ )
58
+ parser.add_argument(
59
+ "--score",
60
+ type=existed_file,
61
+ required=True,
62
+ help="Path to the input score file.",
63
+ )
64
+ parser.add_argument(
65
+ "--output",
66
+ type=str,
67
+ required=True,
68
+ help="Path to save the output file.",
69
+ )
70
+ parser.add_argument(
71
+ "--quantile",
72
+ type=between_zero_and_one,
73
+ default=0.99,
74
+ help="Quantile threshold for outlier detection, between 0 and 1. Default: 0.99.",
75
+ )
76
+ parser.set_defaults(runner=_run_outlier)
@@ -0,0 +1,152 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import argparse
22
+ from sai.parsers.argument_validation import positive_int
23
+ from sai.parsers.argument_validation import positive_number
24
+ from sai.parsers.argument_validation import existed_file
25
+ from sai.sai import plot
26
+
27
+
28
+ def _run_plot(args: argparse.Namespace) -> None:
29
+ """
30
+ Runs the plotting process based on command-line arguments.
31
+
32
+ Parameters
33
+ ----------
34
+ args : argparse.Namespace
35
+ Parsed command-line arguments containing input files, output file,
36
+ xlabel, ylabel, title, figsize_x, figsize_y, dpi, alpha,
37
+ marker_size, marker_color, and marker_style.
38
+ """
39
+ plot(
40
+ u_file=args.u_file,
41
+ q_file=args.q_file,
42
+ output=args.output,
43
+ xlabel=args.xlabel,
44
+ ylabel=args.ylabel,
45
+ title=args.title,
46
+ figsize_x=args.figsize_x,
47
+ figsize_y=args.figsize_y,
48
+ dpi=args.dpi,
49
+ alpha=args.alpha,
50
+ marker_size=args.marker_size,
51
+ marker_color=args.marker_color,
52
+ marker_style=args.marker_style,
53
+ )
54
+
55
+
56
+ def add_plot_parser(subparsers: argparse.ArgumentParser) -> None:
57
+ """
58
+ Initializes and configures the command-line interface parser
59
+ for the plot subcommand.
60
+
61
+ Parameters
62
+ ----------
63
+ subparsers : argparse.ArgumentParser
64
+ A command-line interface parser to be configured.
65
+ """
66
+ parser = subparsers.add_parser(
67
+ "plot", help="Generate a scatter plot of U vs Q statistics."
68
+ )
69
+ parser.add_argument(
70
+ "--u-file",
71
+ dest="u_file",
72
+ type=existed_file,
73
+ required=True,
74
+ help="Path to the U score/outlier file.",
75
+ )
76
+ parser.add_argument(
77
+ "--q-file",
78
+ dest="q_file",
79
+ type=existed_file,
80
+ required=True,
81
+ help="Path to the Q score/outlier file.",
82
+ )
83
+ parser.add_argument(
84
+ "--output",
85
+ type=str,
86
+ required=True,
87
+ help="Path to save the output plot file. The format depends on the file extension (e.g., `.png`, `.pdf`).",
88
+ )
89
+ parser.add_argument(
90
+ "--xlabel",
91
+ type=str,
92
+ default="Q Statistic",
93
+ help="Label for the X-axis. Default: Q Statistic.",
94
+ )
95
+ parser.add_argument(
96
+ "--ylabel",
97
+ type=str,
98
+ default="U Statistic",
99
+ help="Label for the Y-axis. Default: U Statistic.",
100
+ )
101
+ parser.add_argument(
102
+ "--title",
103
+ type=str,
104
+ default="Scatter Plot of U vs Q",
105
+ help="Title of the plot. Default: Scatter Plot of U vs Q.",
106
+ )
107
+ parser.add_argument(
108
+ "--figsize-x",
109
+ type=positive_number,
110
+ default=6,
111
+ help="Width of the figure (in inches). Default: 6.",
112
+ )
113
+ parser.add_argument(
114
+ "--figsize-y",
115
+ type=positive_number,
116
+ default=6,
117
+ help="Height of the figure (in inches). Default: 6.",
118
+ )
119
+ parser.add_argument(
120
+ "--dpi",
121
+ type=positive_int,
122
+ default=300,
123
+ help="Resolution of the saved plot. Default: 300.",
124
+ )
125
+ parser.add_argument(
126
+ "--alpha",
127
+ type=positive_number,
128
+ default=0.6,
129
+ help="Transparency level of scatter points. Default: 0.6.",
130
+ )
131
+ parser.add_argument(
132
+ "--marker-size",
133
+ dest="marker_size",
134
+ type=positive_number,
135
+ default=20,
136
+ help="Size of the scatter plot markers. See matplotlib.pyplot.scatter. Default: 20.",
137
+ )
138
+ parser.add_argument(
139
+ "--marker-color",
140
+ dest="marker_color",
141
+ type=str,
142
+ default="blue",
143
+ help="Color of the markers. See matplotlib.pyplot.scatter. Default: blue.",
144
+ )
145
+ parser.add_argument(
146
+ "--marker-style",
147
+ dest="marker_style",
148
+ type=str,
149
+ default="o",
150
+ help="Shape of the markers. See matplotlib.pyplot.scatter. Default: o.",
151
+ )
152
+ parser.set_defaults(runner=_run_plot)