chromaquant 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chromaquant/__init__.py +9 -2
- chromaquant/data/__init__.py +14 -0
- chromaquant/data/breakdown.py +430 -0
- chromaquant/data/dataset.py +195 -0
- chromaquant/data/table.py +412 -0
- chromaquant/data/value.py +215 -0
- chromaquant/formula/__init__.py +13 -0
- chromaquant/formula/base_formulas.py +168 -0
- chromaquant/formula/formula.py +507 -0
- chromaquant/import_local_packages.py +55 -0
- chromaquant/logging_and_handling.py +76 -0
- chromaquant/match/__init__.py +13 -0
- chromaquant/match/match.py +184 -0
- chromaquant/match/match_config.py +296 -0
- chromaquant/match/match_tools.py +154 -0
- chromaquant/{Handle → results}/__init__.py +2 -2
- chromaquant/results/reporting_tools.py +190 -0
- chromaquant/results/results.py +250 -0
- chromaquant/utils/__init__.py +14 -0
- chromaquant/utils/categories.py +127 -0
- chromaquant/utils/chemical_formulas.py +104 -0
- chromaquant/utils/dataframe_processing.py +222 -0
- chromaquant/utils/file_tools.py +100 -0
- chromaquant/utils/formula_tools.py +119 -0
- chromaquant-0.5.0.dist-info/METADATA +61 -0
- chromaquant-0.5.0.dist-info/RECORD +29 -0
- {chromaquant-0.3.1.dist-info → chromaquant-0.5.0.dist-info}/WHEEL +1 -1
- {chromaquant-0.3.1.dist-info → chromaquant-0.5.0.dist-info}/licenses/LICENSE.txt +1 -1
- chromaquant-0.5.0.dist-info/licenses/LICENSES_bundled.txt +251 -0
- chromaquant/Handle/handleDirectories.py +0 -89
- chromaquant/Manual/HydroUI.py +0 -418
- chromaquant/Manual/QuantUPP.py +0 -373
- chromaquant/Manual/Quantification.py +0 -1305
- chromaquant/Manual/__init__.py +0 -10
- chromaquant/Manual/duplicateMatch.py +0 -211
- chromaquant/Manual/fpm_match.py +0 -798
- chromaquant/Manual/label-type.py +0 -179
- chromaquant/Match/AutoFpmMatch.py +0 -1133
- chromaquant/Match/__init__.py +0 -12
- chromaquant/Quant/AutoQuantification.py +0 -1329
- chromaquant/Quant/__init__.py +0 -12
- chromaquant/__main__.py +0 -493
- chromaquant/properties.json +0 -4
- chromaquant-0.3.1.dist-info/METADATA +0 -189
- chromaquant-0.3.1.dist-info/RECORD +0 -22
- chromaquant-0.3.1.dist-info/entry_points.txt +0 -2
- chromaquant-0.3.1.dist-info/licenses/LICENSES_bundled.txt +0 -1035
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""
|
|
2
|
+
COPYRIGHT STATEMENT:
|
|
3
|
+
|
|
4
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
5
|
+
|
|
6
|
+
Copyright (c) 2026, by Julia Hancock
|
|
7
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
8
|
+
URL: https://www.rorrerlab.com/
|
|
9
|
+
|
|
10
|
+
License: BSD 3-Clause License
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
FUNCTIONS FOR IMPORTING LOCAL PACKAGES
|
|
15
|
+
|
|
16
|
+
Julia Hancock
|
|
17
|
+
Started 1-7-2026
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import importlib.util
|
|
23
|
+
import sys
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Function to get a dictionary of subpackage directories
|
|
27
|
+
def get_local_package_directories():
|
|
28
|
+
|
|
29
|
+
# Get package directory
|
|
30
|
+
app_dir = os.path.dirname(os.path.abspath(__file__))
|
|
31
|
+
|
|
32
|
+
# Get absolute directories for subpackages
|
|
33
|
+
subpack_dir = {'utils': os.path.join(app_dir, 'utils', '__init__.py'),
|
|
34
|
+
'Signal': os.path.join(app_dir, 'Signal', '__init__.py'),
|
|
35
|
+
'Results': os.path.join(app_dir, 'Results', '__init__.py')}
|
|
36
|
+
|
|
37
|
+
return subpack_dir
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Define function to import from path
|
|
41
|
+
def import_from_path(module_name, path):
|
|
42
|
+
|
|
43
|
+
# Define spec
|
|
44
|
+
spec = importlib.util.spec_from_file_location(module_name, path)
|
|
45
|
+
|
|
46
|
+
# Define module
|
|
47
|
+
module = importlib.util.module_from_spec(spec)
|
|
48
|
+
|
|
49
|
+
# Expand sys.modules dict
|
|
50
|
+
sys.modules[module_name] = module
|
|
51
|
+
|
|
52
|
+
# Load module
|
|
53
|
+
spec.loader.exec_module(module)
|
|
54
|
+
|
|
55
|
+
return module
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
COPYRIGHT STATEMENT:
|
|
3
|
+
|
|
4
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
5
|
+
|
|
6
|
+
Copyright (c) 2026, by Julia Hancock
|
|
7
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
8
|
+
URL: https://www.rorrerlab.com/
|
|
9
|
+
|
|
10
|
+
License: BSD 3-Clause License
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
FUNCTIONS FOR HANDLING LOGGING AND ERRORS
|
|
15
|
+
|
|
16
|
+
Julia Hancock
|
|
17
|
+
Started 1-7-2026
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
from functools import wraps
|
|
23
|
+
from collections.abc import Callable
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Function to format logger
|
|
28
|
+
def setup_logger(logger: logging.Logger) -> logging.Logger:
|
|
29
|
+
|
|
30
|
+
# Check if logger has handlers, clear if so
|
|
31
|
+
if (logger.hasHandlers()):
|
|
32
|
+
logger.handlers.clear
|
|
33
|
+
|
|
34
|
+
# Add a handler for the console
|
|
35
|
+
console_handler = logging.StreamHandler()
|
|
36
|
+
logger.addHandler(console_handler)
|
|
37
|
+
|
|
38
|
+
# Create a formatter object
|
|
39
|
+
formatter = logging.Formatter(
|
|
40
|
+
"{asctime} - [{name:^8s}][{levelname:^8s}] {message}",
|
|
41
|
+
style='{',
|
|
42
|
+
datefmt='%Y-%m-%d %H:%M')
|
|
43
|
+
|
|
44
|
+
# Set the console handler's format using the new formatter
|
|
45
|
+
console_handler.setFormatter(formatter)
|
|
46
|
+
|
|
47
|
+
# Set logger level - NOTE: Change before commit if debugging
|
|
48
|
+
logger.setLevel(logging.INFO)
|
|
49
|
+
|
|
50
|
+
return logger
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Function that sets up a decorator to log errors while handling them
|
|
54
|
+
def setup_error_logging(logger: logging.Logger) -> \
|
|
55
|
+
Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
56
|
+
|
|
57
|
+
# Decorator to log and handle errors
|
|
58
|
+
def error_logging(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
59
|
+
|
|
60
|
+
# Define decorated function (wrapper)
|
|
61
|
+
@wraps(f)
|
|
62
|
+
def decorated_func(*args: Any, **kwargs: Any) -> Callable[..., Any]:
|
|
63
|
+
|
|
64
|
+
# Try to get the function's result
|
|
65
|
+
try:
|
|
66
|
+
result = f(*args, **kwargs)
|
|
67
|
+
return result
|
|
68
|
+
|
|
69
|
+
# Log errors if they occur
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error(e)
|
|
72
|
+
raise
|
|
73
|
+
|
|
74
|
+
return decorated_func
|
|
75
|
+
|
|
76
|
+
return error_logging
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
ChromaQuant.match module initialization
|
|
5
|
+
|
|
6
|
+
Julia Hancock
|
|
7
|
+
Created 10-19-2024
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .match_config import MatchConfig
|
|
12
|
+
from .match_tools import match_dataframes
|
|
13
|
+
from .match import match
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
|
|
4
|
+
COPYRIGHT STATEMENT:
|
|
5
|
+
|
|
6
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026, by Julia Hancock
|
|
9
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
10
|
+
URL: https://www.rorrerlab.com/
|
|
11
|
+
|
|
12
|
+
License: BSD 3-Clause License
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
MATCH FUNCTION FOR DATAFRAME COMPARISONS
|
|
17
|
+
|
|
18
|
+
Julia Hancock
|
|
19
|
+
Started 1-15-2026
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from .match_config import MatchConfig
|
|
25
|
+
from .match_tools import match_dataframes
|
|
26
|
+
from ..utils.file_tools import try_open_csv, export_to_csv
|
|
27
|
+
from ..utils.dataframe_processing import column_adjust, \
|
|
28
|
+
row_filter
|
|
29
|
+
from ..logging_and_handling import setup_logger, setup_error_logging
|
|
30
|
+
|
|
31
|
+
""" LOGGING AND HANDLING """
|
|
32
|
+
|
|
33
|
+
# Create a logger
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
# Format the logger
|
|
37
|
+
logger = setup_logger(logger)
|
|
38
|
+
|
|
39
|
+
# Get an error logging decorator
|
|
40
|
+
error_logging = setup_error_logging(logger)
|
|
41
|
+
|
|
42
|
+
""" FUNCTION """
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# Match function
|
|
46
|
+
def match(first_DF,
|
|
47
|
+
second_DF,
|
|
48
|
+
match_config=MatchConfig()):
|
|
49
|
+
"""Matches data from two DataFrames
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
first_DF: pandas DataFrame
|
|
54
|
+
A DataFrame containing data to be matched to data
|
|
55
|
+
in second_DF, processed, then returned as match_data.
|
|
56
|
+
second_DF: pandas DataFrame
|
|
57
|
+
A DataFrame containing data to be matched to data in first_DF.
|
|
58
|
+
match_config: MatchConfig object
|
|
59
|
+
A MatchConfig instance containing information on how to match
|
|
60
|
+
the two data sets.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
match_data : pandas DataFrame
|
|
65
|
+
A DataFrame containing the results from matching.
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
""" EVALUATING ARGUMENTS """
|
|
70
|
+
|
|
71
|
+
# If the match_config import_include_col list is empty...
|
|
72
|
+
if not match_config.import_include_col:
|
|
73
|
+
# Add all columns from the second dataframe
|
|
74
|
+
match_config.import_include_col = \
|
|
75
|
+
[column for column in second_DF.columns.tolist()
|
|
76
|
+
if column not in first_DF.columns.tolist()]
|
|
77
|
+
# Otherwise, pass
|
|
78
|
+
else:
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
""" CREATE OR LOAD MATCH DATAFRAME """
|
|
82
|
+
|
|
83
|
+
# Check if a match file already exists at the specified path
|
|
84
|
+
try_open_tf, match_data = \
|
|
85
|
+
try_open_csv(match_config.output_path)
|
|
86
|
+
|
|
87
|
+
# If a match file already exists,
|
|
88
|
+
# open it and save it to data object **SEE NOTE
|
|
89
|
+
if try_open_tf:
|
|
90
|
+
# NOTE: Commented out potential feature to reopen previous match
|
|
91
|
+
# file if one exists at the output path, untested
|
|
92
|
+
# logger.info((f'Opening match file at '
|
|
93
|
+
# f'{self.match_config.output_path}'))
|
|
94
|
+
# self.data[match_key] = match_data
|
|
95
|
+
# logger.warning('Overwriting previous match data.')
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
# Otherwise, pass
|
|
99
|
+
else:
|
|
100
|
+
# NOTE: See comment in if statement
|
|
101
|
+
# Create a copy of the locally_defined dataframe
|
|
102
|
+
# self.data[match_key] = \
|
|
103
|
+
# self.data[match_dict.local_data_key].copy()
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
# Create a copy of the first DF
|
|
107
|
+
match_data = \
|
|
108
|
+
first_DF.copy()
|
|
109
|
+
|
|
110
|
+
""" FILTER ROWS """
|
|
111
|
+
# Adjust the dataframe according to match_config
|
|
112
|
+
# Filter rows first in case desired filter includes a column
|
|
113
|
+
# that will be renamed or removed
|
|
114
|
+
match_data = row_filter(
|
|
115
|
+
match_data,
|
|
116
|
+
match_config.local_filter_row
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Add column headers for columns to include from import
|
|
120
|
+
match_data = column_adjust(
|
|
121
|
+
dataframe=match_data,
|
|
122
|
+
add_col=match_config.import_include_col
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
""" MATCH DATAFRAMES """
|
|
126
|
+
# Match the local and import data sets
|
|
127
|
+
match_data = \
|
|
128
|
+
match_dataframes(match_data,
|
|
129
|
+
second_DF,
|
|
130
|
+
match_config)
|
|
131
|
+
|
|
132
|
+
""" ADJUST OUTPUT """
|
|
133
|
+
|
|
134
|
+
# Get the current columns in match_data
|
|
135
|
+
match_cols = match_data.columns.tolist()
|
|
136
|
+
|
|
137
|
+
# If the output_cols_dict is not empty...
|
|
138
|
+
if match_config.output_cols_dict:
|
|
139
|
+
|
|
140
|
+
# Get the keys for output_cols_dict as list of original columns
|
|
141
|
+
output_cols_keys = list(
|
|
142
|
+
match_config.output_cols_dict.keys()
|
|
143
|
+
)
|
|
144
|
+
# Get the values for output_cols_dict as list of new columns
|
|
145
|
+
output_cols_values = list(
|
|
146
|
+
match_config.output_cols_dict.values()
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Filter the dataframe according to output_cols
|
|
150
|
+
# NOTE: This preserves the column order as seen in output_cols_dict
|
|
151
|
+
# First, add columns present in output_cols but not match_cols
|
|
152
|
+
# And then rename columns using output_cols_dict
|
|
153
|
+
columns_to_add = list(
|
|
154
|
+
set(output_cols_keys).difference(set(match_cols))
|
|
155
|
+
)
|
|
156
|
+
match_data = column_adjust(
|
|
157
|
+
dataframe=match_data,
|
|
158
|
+
add_col=columns_to_add,
|
|
159
|
+
rename_dict=match_config.output_cols_dict
|
|
160
|
+
)
|
|
161
|
+
# Finally, filter
|
|
162
|
+
match_data = \
|
|
163
|
+
match_data[output_cols_values]
|
|
164
|
+
|
|
165
|
+
# Otherwise, pass
|
|
166
|
+
else:
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
""" (OPTIONAL) EXPORT TO FILE """
|
|
170
|
+
|
|
171
|
+
# If the do_export value is True, export to output path
|
|
172
|
+
if match_config.do_export:
|
|
173
|
+
export_to_csv(
|
|
174
|
+
match_data,
|
|
175
|
+
match_config.output_path
|
|
176
|
+
)
|
|
177
|
+
# logger.info('Match results exported to '
|
|
178
|
+
# f'{match_config.output_path}')
|
|
179
|
+
|
|
180
|
+
# Otherwise, pass
|
|
181
|
+
else:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
return match_data
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
COPYRIGHT STATEMENT:
|
|
5
|
+
|
|
6
|
+
ChromaQuant – A quantification software for complex gas chromatographic data
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026, by Julia Hancock
|
|
9
|
+
Affiliation: Dr. Julie Elaine Rorrer
|
|
10
|
+
URL: https://www.rorrerlab.com/
|
|
11
|
+
|
|
12
|
+
License: BSD 3-Clause License
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
CLASS DEFINITION FOR MATCH CONFIGURATION
|
|
17
|
+
|
|
18
|
+
Julia Hancock
|
|
19
|
+
Started 1-12-2026
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
from typing import Any
|
|
25
|
+
from collections.abc import Callable
|
|
26
|
+
|
|
27
|
+
""" CLASS """
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Define ConfigProperty class
|
|
31
|
+
class ConfigProperty():
|
|
32
|
+
|
|
33
|
+
# Descriptor __set_name__
|
|
34
|
+
def __set_name__(self, owner, name):
|
|
35
|
+
self.name = '_' + name
|
|
36
|
+
|
|
37
|
+
# Getter
|
|
38
|
+
def __get__(self, obj, type=None):
|
|
39
|
+
return getattr(obj, self.name)
|
|
40
|
+
|
|
41
|
+
# Setter
|
|
42
|
+
def __set__(self, obj, value):
|
|
43
|
+
setattr(obj, self.name, value)
|
|
44
|
+
|
|
45
|
+
# Deleter
|
|
46
|
+
def __delete__(self, obj):
|
|
47
|
+
delattr(obj, self.name)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Define MatchConfig class
|
|
51
|
+
class MatchConfig():
|
|
52
|
+
|
|
53
|
+
# Create class instances of ConfigProperty for every property
|
|
54
|
+
do_export = ConfigProperty()
|
|
55
|
+
import_include_col = ConfigProperty()
|
|
56
|
+
local_filter_row = ConfigProperty()
|
|
57
|
+
match_conditions = ConfigProperty()
|
|
58
|
+
multiple_hits_rule = ConfigProperty()
|
|
59
|
+
multiple_hits_column = ConfigProperty()
|
|
60
|
+
output_cols_dict = ConfigProperty()
|
|
61
|
+
output_path = ConfigProperty()
|
|
62
|
+
|
|
63
|
+
# Initialize
|
|
64
|
+
def __init__(self,
|
|
65
|
+
do_export: bool = False,
|
|
66
|
+
import_include_col: list[str] | None = None,
|
|
67
|
+
local_filter_row:
|
|
68
|
+
dict[str, str | bool | float | int] | None = None,
|
|
69
|
+
match_conditions: list[dict[str, Any]] | None = None,
|
|
70
|
+
multiple_hits_rule:
|
|
71
|
+
Callable[[pd.DataFrame, str], pd.Series] | None = None,
|
|
72
|
+
multiple_hits_column: str = 'default',
|
|
73
|
+
output_cols_dict: dict[str, str] | None = None,
|
|
74
|
+
output_path: str = 'match_results.csv'):
|
|
75
|
+
|
|
76
|
+
# Expected structure of match_conditions:
|
|
77
|
+
# self.match_conditions = [{
|
|
78
|
+
# 'condition': self.IS_EQUAL,
|
|
79
|
+
# NOTE: or another condition
|
|
80
|
+
# 'first_DF_column': {STRING},
|
|
81
|
+
# 'second_DF_column': {STRING},
|
|
82
|
+
# 'error': {FLOAT},
|
|
83
|
+
# 'or_equal': {BOOLEAN}
|
|
84
|
+
# },
|
|
85
|
+
# ...]
|
|
86
|
+
|
|
87
|
+
# Define default match comparison function
|
|
88
|
+
def default_comp_function(x):
|
|
89
|
+
return x
|
|
90
|
+
|
|
91
|
+
# Set descriptor values
|
|
92
|
+
self.do_export = do_export
|
|
93
|
+
self.import_include_col = import_include_col \
|
|
94
|
+
if import_include_col is not None else []
|
|
95
|
+
self.local_filter_row = local_filter_row \
|
|
96
|
+
if local_filter_row is not None else {}
|
|
97
|
+
self.match_conditions = match_conditions \
|
|
98
|
+
if match_conditions is not None else []
|
|
99
|
+
self.multiple_hits_rule = multiple_hits_rule \
|
|
100
|
+
if multiple_hits_rule is not None else self.SELECT_FIRST_ROW
|
|
101
|
+
self.multiple_hits_column = multiple_hits_column
|
|
102
|
+
self.output_cols_dict = output_cols_dict\
|
|
103
|
+
if output_cols_dict is not None else {}
|
|
104
|
+
self.output_path = output_path
|
|
105
|
+
|
|
106
|
+
""" METHODS """
|
|
107
|
+
|
|
108
|
+
# Method to add a new match condition
|
|
109
|
+
def add_match_condition(self,
|
|
110
|
+
condition: Callable[[pd.DataFrame, str],
|
|
111
|
+
pd.Series],
|
|
112
|
+
comparison: str | list[str],
|
|
113
|
+
error: int | float = 0,
|
|
114
|
+
or_equal: bool = False):
|
|
115
|
+
|
|
116
|
+
# Check if comparison is a string
|
|
117
|
+
try:
|
|
118
|
+
comparison.split()
|
|
119
|
+
# If can split comparison, assign both first and second
|
|
120
|
+
# comparison to this one value
|
|
121
|
+
first_comparison = comparison
|
|
122
|
+
second_comparison = comparison
|
|
123
|
+
# If comparison is not a string
|
|
124
|
+
except Exception:
|
|
125
|
+
# If the length is one...
|
|
126
|
+
if len(comparison) == 1:
|
|
127
|
+
# Set first and second comparison to this one value
|
|
128
|
+
first_comparison = comparison[0]
|
|
129
|
+
second_comparison = comparison[0]
|
|
130
|
+
# If the length is two...
|
|
131
|
+
if len(comparison) == 2:
|
|
132
|
+
# Set the first and second comparison respectively
|
|
133
|
+
first_comparison = comparison[0]
|
|
134
|
+
second_comparison = comparison[1]
|
|
135
|
+
# If the length is neither one or two, raise an error
|
|
136
|
+
else:
|
|
137
|
+
raise ValueError('Unexpected value passed for comparison.')
|
|
138
|
+
|
|
139
|
+
# Append new match condition to list of conditions
|
|
140
|
+
self.match_conditions.append(
|
|
141
|
+
{
|
|
142
|
+
'condition': condition,
|
|
143
|
+
'first_DF_column': first_comparison,
|
|
144
|
+
'second_DF_column': second_comparison,
|
|
145
|
+
'error': error,
|
|
146
|
+
'or_equal': or_equal
|
|
147
|
+
}
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
""" STATIC METHODS """
|
|
153
|
+
|
|
154
|
+
# Method to get a slice of a DataFrame where one of its
|
|
155
|
+
# column's values are equal to some value
|
|
156
|
+
@staticmethod
|
|
157
|
+
def IS_EQUAL(value: Any,
|
|
158
|
+
DF: pd.DataFrame,
|
|
159
|
+
DF_column_name: str,
|
|
160
|
+
error: float | int = 0,
|
|
161
|
+
or_equal: bool = False) -> pd.DataFrame:
|
|
162
|
+
|
|
163
|
+
# Get a slice where the comparisons are exactly equal
|
|
164
|
+
DF_slice = DF.loc[DF[DF_column_name] == value].copy()
|
|
165
|
+
|
|
166
|
+
# Try to get a slice where the comparison is
|
|
167
|
+
# within specified error margins
|
|
168
|
+
try:
|
|
169
|
+
|
|
170
|
+
# Define upper and lower limits
|
|
171
|
+
series_value_max = value + error
|
|
172
|
+
series_value_min = value - error
|
|
173
|
+
|
|
174
|
+
# Get a slice
|
|
175
|
+
DF_slice = \
|
|
176
|
+
DF.loc[(DF[DF_column_name] >= series_value_min) &
|
|
177
|
+
(DF[DF_column_name] <= series_value_max)].copy()
|
|
178
|
+
|
|
179
|
+
# If an error occurs when trying to get such a slice, pass
|
|
180
|
+
# NOTE: This is intended to catch cases where comparison
|
|
181
|
+
# values are non-numbers
|
|
182
|
+
except Exception:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
return DF_slice
|
|
186
|
+
|
|
187
|
+
# Method to get a slice of a DataFrame where one of its
|
|
188
|
+
# column's values are less than some value
|
|
189
|
+
# (i.e., a value is *greater than* the DataFrame value)
|
|
190
|
+
@staticmethod
|
|
191
|
+
def GREATER_THAN(value: Any,
|
|
192
|
+
DF: pd.DataFrame,
|
|
193
|
+
DF_column_name: str,
|
|
194
|
+
error: float | int = 0,
|
|
195
|
+
or_equal: bool = False) -> pd.DataFrame:
|
|
196
|
+
|
|
197
|
+
# Try to get a slice with condition
|
|
198
|
+
try:
|
|
199
|
+
|
|
200
|
+
# If the values can be equal...
|
|
201
|
+
if or_equal:
|
|
202
|
+
# Get a slice
|
|
203
|
+
DF_slice = \
|
|
204
|
+
DF.loc[DF[DF_column_name] <= value].copy()
|
|
205
|
+
|
|
206
|
+
# If the values cannot be equal...
|
|
207
|
+
else:
|
|
208
|
+
# Get a slice
|
|
209
|
+
DF_slice = \
|
|
210
|
+
DF.loc[DF[DF_column_name] < value].copy()
|
|
211
|
+
|
|
212
|
+
# If an error occurs when trying to get such a slice, pass
|
|
213
|
+
# NOTE: This is intended to catch cases where comparison
|
|
214
|
+
# values are non-numbers
|
|
215
|
+
except Exception:
|
|
216
|
+
|
|
217
|
+
# Get an empty DataFrame
|
|
218
|
+
DF_slice = DF.loc[pd.Index([]), :].copy()
|
|
219
|
+
|
|
220
|
+
return DF_slice
|
|
221
|
+
|
|
222
|
+
# Method to get a slice of a DataFrame where one of its
|
|
223
|
+
# column's values are greater than some value
|
|
224
|
+
# (i.e., a value is *less than* the DataFrame value)
|
|
225
|
+
@staticmethod
|
|
226
|
+
def LESS_THAN(value: Any,
|
|
227
|
+
DF: pd.DataFrame,
|
|
228
|
+
DF_column_name: str,
|
|
229
|
+
error: float | int = 0,
|
|
230
|
+
or_equal: bool = False) -> pd.DataFrame:
|
|
231
|
+
|
|
232
|
+
# Try to get a slice with condition
|
|
233
|
+
try:
|
|
234
|
+
|
|
235
|
+
# If the values can be equal...
|
|
236
|
+
if or_equal:
|
|
237
|
+
# Get a slice
|
|
238
|
+
DF_slice = \
|
|
239
|
+
DF.loc[DF[DF_column_name] >= value].copy()
|
|
240
|
+
|
|
241
|
+
# If the values cannot be equal...
|
|
242
|
+
else:
|
|
243
|
+
# Get a slice
|
|
244
|
+
DF_slice = \
|
|
245
|
+
DF.loc[DF[DF_column_name] > value].copy()
|
|
246
|
+
|
|
247
|
+
# If an error occurs when trying to get such a slice, pass
|
|
248
|
+
# NOTE: This is intended to catch cases where comparison
|
|
249
|
+
# values are non-numbers
|
|
250
|
+
except Exception:
|
|
251
|
+
|
|
252
|
+
# Get an empty DataFrame
|
|
253
|
+
DF_slice = DF.loc[pd.Index([]), :].copy()
|
|
254
|
+
|
|
255
|
+
return DF_slice
|
|
256
|
+
|
|
257
|
+
# Method that gets the first row of a slice, used as the default
|
|
258
|
+
# method of selecting one row of a slice that meets match conditions
|
|
259
|
+
@staticmethod
|
|
260
|
+
def SELECT_FIRST_ROW(DF: pd.DataFrame,
|
|
261
|
+
column_name: str) -> pd.Series:
|
|
262
|
+
|
|
263
|
+
# Get the first row of the DataFrame
|
|
264
|
+
first_row = DF.loc[DF.index.min()]
|
|
265
|
+
|
|
266
|
+
return first_row
|
|
267
|
+
|
|
268
|
+
# Method that selects the row with the smallest value in a given column
|
|
269
|
+
# NOTE: will return the first occurrence of the smallest value if multiple
|
|
270
|
+
# values share the same minimum
|
|
271
|
+
@staticmethod
|
|
272
|
+
def SELECT_LOWEST_VALUE(DF: pd.DataFrame,
|
|
273
|
+
column_name: str) -> pd.Series:
|
|
274
|
+
|
|
275
|
+
# Get the minimum value
|
|
276
|
+
min_value_index = DF[column_name].idxmin()
|
|
277
|
+
|
|
278
|
+
# Get the row with the smallest value
|
|
279
|
+
min_value_row = DF.loc[min_value_index]
|
|
280
|
+
|
|
281
|
+
return min_value_row
|
|
282
|
+
|
|
283
|
+
# Method that selects the row with the largest value in a given column
|
|
284
|
+
# NOTE: will return the first occurrence of the largest value if multiple
|
|
285
|
+
# values share the same maximum
|
|
286
|
+
@staticmethod
|
|
287
|
+
def SELECT_HIGHEST_VALUE(DF: pd.DataFrame,
|
|
288
|
+
column_name: str) -> pd.Series:
|
|
289
|
+
|
|
290
|
+
# Get the maximum value
|
|
291
|
+
max_value_index = DF[column_name].idxmax()
|
|
292
|
+
|
|
293
|
+
# Get the row with the largest value
|
|
294
|
+
max_value_row = DF.loc[max_value_index]
|
|
295
|
+
|
|
296
|
+
return max_value_row
|