pyaerial 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aerial/__init__.py ADDED
@@ -0,0 +1,29 @@
1
+ import logging
2
+ from . import discretization, rule_quality, model
3
+ from aerial.rule_extraction import generate_rules, generate_frequent_itemsets
4
+
5
+ __all__ = [discretization, rule_quality, model, generate_rules, generate_frequent_itemsets]
6
+
7
+ # Create a package-wide logger
8
+ logger = logging.getLogger("aerial")
9
+ logger.propagate = True
10
+ logger.addHandler(logging.NullHandler())
11
+
12
+
13
+ def setup_logging(level=logging.INFO, propagate=True):
14
+ """Configure package logging"""
15
+ logger.propagate = propagate
16
+ logger.setLevel(level)
17
+
18
+ # Remove all existing handlers
19
+ for handler in logger.handlers[:]:
20
+ logger.removeHandler(handler)
21
+
22
+ # Add new console handler if level is not NOTSET
23
+ if level != logging.NOTSET:
24
+ handler = logging.StreamHandler()
25
+ formatter = logging.Formatter(
26
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
27
+ )
28
+ handler.setFormatter(formatter)
29
+ logger.addHandler(handler)
Binary file
Binary file
@@ -0,0 +1,74 @@
1
+ """
2
+ Copyright (c) [2025] [Erkan Karabulut - DiTEC Project]
3
+
4
+ This script implements data preparation functions for tabular for association rule mining with Aerial
5
+ """
6
+
7
+ import concurrent
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ from aerial.table import get_unique_values_per_column
14
+
15
+
16
+ def _one_hot_encoding_with_feature_tracking(transactions: pd.DataFrame, parallel_workers=1):
17
+ """
18
+ Create input vectors for training the Autoencoder in a one-hot encoded form. And returns indices of each feature
19
+ values in a structured way for future tracking when extracting rules from a trained Autoencoder
20
+ :param transactions: pandas DataFrame of transactions
21
+ :return: a python dictionary with 3 objects
22
+ vector_list: transactions as a list of one-hot encoded vectors,
23
+ vector_tracker_list: a list ,
24
+ "feature_value_indices": feature_value_indices,
25
+ """
26
+ # Aerial uses "__" to separate column names and their values when one-hot encoding, {COL_NAME}__{value}
27
+ # therefore, replace all "__" in column names with "--" to avoid later confusion in naming
28
+ transactions.columns = [col.replace('__', '--') for col in transactions.columns]
29
+ columns = transactions.columns.tolist()
30
+
31
+ # Get input vectors in the form of one-hot encoded vectors
32
+ unique_values, value_count = get_unique_values_per_column(transactions)
33
+ feature_value_indices = []
34
+ vector_tracker = []
35
+ start = 0
36
+
37
+ # Track what each value in the input vector corresponds to
38
+ # Track where do values for each feature start and end in the input feature
39
+ for feature, values in unique_values.items():
40
+ end = start + len(values)
41
+ feature_value_indices.append({'feature': feature, 'start': start, 'end': end})
42
+ vector_tracker.extend([f"{feature}__{value}" for value in values])
43
+ start = end
44
+
45
+ # Map tracker entries to indices for fast lookup
46
+ tracker_index_map = {key: idx for idx, key in enumerate(vector_tracker)}
47
+
48
+ # Preallocate vector list
49
+ vector_list = np.zeros((len(transactions), value_count), dtype=int)
50
+
51
+ # Function to process each transaction
52
+ def process_transaction(transaction_idx, transaction):
53
+ transaction_vector = np.zeros(value_count, dtype=int)
54
+ for col_idx, value in enumerate(transaction):
55
+ if not pd.isna(value):
56
+ key = f"{columns[col_idx]}__{value}"
57
+ transaction_vector[tracker_index_map[key]] = 1
58
+ return transaction_idx, transaction_vector
59
+
60
+ # Parallelize transaction processing
61
+ # NOTE: Preparing the input data for each of the algorithms is not included in the execution time calculation
62
+ # Therefore, we preprocess data in parallel where possible for each of the algorithm
63
+ with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_workers) as executor:
64
+ futures = [
65
+ executor.submit(process_transaction, transaction_idx, transaction)
66
+ for transaction_idx, transaction in enumerate(transactions.itertuples(index=False))
67
+ ]
68
+
69
+ for future in concurrent.futures.as_completed(futures):
70
+ transaction_idx, transaction_vector = future.result()
71
+ vector_list[transaction_idx] = transaction_vector
72
+
73
+ vector_list = pd.DataFrame(vector_list, columns=vector_tracker)
74
+ return vector_list, feature_value_indices
@@ -0,0 +1,56 @@
1
+ """
2
+ Copyright (c) [2025] [Erkan Karabulut - DiTEC Project]
3
+
4
+ This script include different discretization methods for tabular data
5
+ """
6
+ import logging
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ logger = logging.getLogger("aerial")
12
+
13
+
14
+ def equal_frequency_discretization(df: pd.DataFrame, n_bins=10):
15
+ """
16
+ Detect numerical columns automatically and discretize them into n_bins intervals based on equal frequency.
17
+ Intervals are represented as strings.
18
+
19
+ :param df: tabular data in pandas DataFrame form
20
+ :param n_bins: number of intervals (bins)
21
+ :return: df with discrete columns
22
+ """
23
+ df_discretized = df.copy()
24
+ num_cols = df.select_dtypes(include=[np.number]).columns
25
+
26
+ for col in num_cols:
27
+ try:
28
+ # Use labels=True to get string intervals
29
+ df_discretized[col] = pd.qcut(df[col], q=n_bins, duplicates='drop')
30
+ df_discretized[col] = df_discretized[col].astype(str)
31
+ except ValueError:
32
+ logger.debug(f"Column '{col}' could not be discretized due to insufficient unique values.")
33
+
34
+ return df_discretized
35
+
36
+
37
+ def equal_width_discretization(df: pd.DataFrame, n_bins=10):
38
+ """
39
+ Detect numerical columns automatically and discretize them into n_bins intervals based on equal width.
40
+ Intervals are represented as strings.
41
+
42
+ :param df: tabular data in pandas DataFrame form
43
+ :param n_bins: number of intervals (bins)
44
+ :return: df with discrete columns
45
+ """
46
+ df_discretized = df.copy()
47
+ num_cols = df.select_dtypes(include=[np.number]).columns
48
+
49
+ for col in num_cols:
50
+ try:
51
+ df_discretized[col] = pd.cut(df[col], bins=n_bins)
52
+ df_discretized[col] = df_discretized[col].astype(str)
53
+ except ValueError:
54
+ logger.debug(f"Column '{col}' could not be discretized due to insufficient unique values.")
55
+
56
+ return df_discretized
aerial/model.py ADDED
@@ -0,0 +1,179 @@
1
+ """
2
+ Copyright (c) [2025] [Erkan Karabulut - DiTEC Project]
3
+
4
+ Construct an Autoencoder for association rule mining as described in the paper (Neurosymbolic association rule mining
5
+ from tabular data - https://arxiv.org/abs/2504.19354)
6
+ """
7
+
8
+ import os
9
+ import logging
10
+ import torch
11
+ import pandas as pd
12
+ from torch import nn
13
+ import math
14
+ import torch.nn.functional as F
15
+
16
+ from torch.utils.data import TensorDataset, DataLoader
17
+
18
+ from aerial.data_preparation import _one_hot_encoding_with_feature_tracking
19
+
20
+ logger = logging.getLogger("aerial")
21
+
22
+
23
+ class AutoEncoder(nn.Module):
24
+ """
25
+ This autoencoder is used to create a neural representation of tabular data for association rule mining
26
+ """
27
+
28
+ def __init__(self, input_dimension, feature_count, layer_dims: list = None):
29
+ """
30
+ The init function can either construct an under-complete Autoencoder based on the input dimension and feature
31
+ count, automatically deciding the number of layers and layer dimensions.
32
+ Or, if specified by the user, it can also use the layer counts and dimensions from the user.
33
+ Note that fine-tuning layer count and dimensions based on your table dimension and size
34
+ will result in better performance in general
35
+
36
+ :param input_dimension: number of features after one-hot encoding (input dimension)
37
+ :param feature_count: target feature count (initial column count of tabular data)
38
+ :param layer_dims: (optional) list of int, specific dimensions for hidden layers
39
+ (excluding input/output dimensions)
40
+ """
41
+ super().__init__()
42
+
43
+ self.input_dimension = input_dimension
44
+ self.feature_count = feature_count
45
+ self.input_vectors = None
46
+ self.feature_value_indices = None
47
+ self.feature_values = None
48
+
49
+ # Determine the layer dimensions
50
+ if layer_dims is None:
51
+ # Compute default number of layers based on log base 16
52
+ layer_count = max(1, math.ceil(math.log(input_dimension, 16)) - 1)
53
+
54
+ # Calculate dimensions with consistent reduction ratio
55
+ reduction_ratio = (feature_count / input_dimension) ** (1 / (layer_count))
56
+ dimensions = [input_dimension]
57
+ for i in range(1, layer_count):
58
+ next_dim = max(feature_count, int(dimensions[-1] * reduction_ratio))
59
+ dimensions.append(next_dim)
60
+ dimensions.append(feature_count)
61
+ else:
62
+ # Use provided layer dimensions, adding input and output dimensions
63
+ dimensions = [input_dimension] + layer_dims
64
+
65
+ self.dimensions = dimensions # save for inspection
66
+
67
+ # Build Encoder
68
+ encoder_layers = []
69
+ for i in range(len(dimensions) - 1):
70
+ encoder_layers.append(nn.Linear(dimensions[i], dimensions[i + 1]))
71
+ if i != len(dimensions) - 2: # No activation after last encoder layer
72
+ encoder_layers.append(nn.Tanh())
73
+
74
+ self.encoder = nn.Sequential(*encoder_layers)
75
+
76
+ # Build Decoder (mirror of encoder, excluding final layer's activation)
77
+ decoder_layers = []
78
+ reversed_dimensions = list(reversed(dimensions))
79
+ for i in range(len(reversed_dimensions) - 1):
80
+ decoder_layers.append(nn.Linear(reversed_dimensions[i], reversed_dimensions[i + 1]))
81
+ if i != len(reversed_dimensions) - 2:
82
+ decoder_layers.append(nn.Tanh())
83
+
84
+ self.decoder = nn.Sequential(*decoder_layers)
85
+
86
+ self.encoder.apply(self.init_weights)
87
+ self.decoder.apply(self.init_weights)
88
+
89
+ @staticmethod
90
+ def init_weights(m):
91
+ """
92
+ all weights are initialized with values sampled from uniform distributions with the Xavier initialization
93
+ and the biases are set to 0, as described in the paper by Delong et al. (2023)
94
+ """
95
+ if isinstance(m, nn.Linear):
96
+ torch.nn.init.xavier_uniform_(m.weight)
97
+ m.bias.data.zero_()
98
+
99
+ def save(self, name):
100
+ torch.save(self.encoder.state_dict(), name + "_encoder.pt")
101
+ torch.save(self.decoder.state_dict(), name + '_decoder.pt')
102
+
103
+ def load(self, name):
104
+ if os.path.isfile(name + '_encoder.pt') and os.path.isfile(name + '_decoder.pt'):
105
+ self.encoder.load_state_dict(torch.load(name + '_encoder.pt'))
106
+ self.decoder.load_state_dict(torch.load(name + '_decoder.pt'))
107
+ self.encoder.eval()
108
+ self.decoder.eval()
109
+ return True
110
+ else:
111
+ return False
112
+
113
+ def forward(self, x, feature_value_indices):
114
+ y = self.encoder(x)
115
+ y = self.decoder(y)
116
+
117
+ # Split the tensor into chunks based on the ranges
118
+ chunks = [y[:, start:end] for start, end in feature_value_indices]
119
+
120
+ # Apply softmax to each chunk
121
+ softmax_chunks = [F.softmax(chunk, dim=1) for chunk in chunks]
122
+
123
+ # Concatenate the chunks back together
124
+ y = torch.cat(softmax_chunks, dim=1)
125
+
126
+ return y
127
+
128
+
129
+ def train(transactions: pd.DataFrame, autoencoder: AutoEncoder = None, noise_factor=0.5,
130
+ lr=5e-3, epochs=1, batch_size=2, loss_function=torch.nn.BCELoss(), num_workers=1, layer_dims: list = None):
131
+ """
132
+ train an autoencoder for association rule mining
133
+ """
134
+ input_vectors, feature_value_indices = _one_hot_encoding_with_feature_tracking(transactions, num_workers)
135
+ columns = input_vectors.columns.tolist()
136
+
137
+ if not autoencoder:
138
+ autoencoder = AutoEncoder(input_dimension=len(columns), feature_count=len(feature_value_indices),
139
+ layer_dims=layer_dims)
140
+
141
+ autoencoder.input_vectors = input_vectors
142
+
143
+ input_vectors = input_vectors.to_numpy()
144
+
145
+ autoencoder = torch.compile(autoencoder)
146
+ autoencoder.feature_value_indices = feature_value_indices
147
+ autoencoder.feature_values = columns
148
+
149
+ optimizer = torch.optim.Adam(autoencoder.parameters(), lr=lr, weight_decay=2e-8)
150
+
151
+ vectors_tensor = torch.tensor(input_vectors, dtype=torch.float32)
152
+ dataset = TensorDataset(vectors_tensor)
153
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
154
+
155
+ softmax_ranges = [(cat['start'], cat['end']) for cat in feature_value_indices]
156
+
157
+ for epoch in range(epochs):
158
+ # print(f"Epoch {epoch + 1}/{epochs}")
159
+ for batch_index, (batch,) in enumerate(dataloader):
160
+ noisy_batch = (batch + torch.randn_like(batch) * noise_factor).clamp(0, 1)
161
+
162
+ # Forward pass
163
+ reconstructed_batch = autoencoder(noisy_batch, softmax_ranges)
164
+
165
+ # Compute loss for the entire batch
166
+ total_loss = sum(
167
+ loss_function(
168
+ reconstructed_batch[:, start:end],
169
+ batch[:, start:end]
170
+ )
171
+ for (start, end) in softmax_ranges
172
+ )
173
+
174
+ # Backpropagation and optimization step
175
+ optimizer.zero_grad()
176
+ total_loss.backward()
177
+ optimizer.step()
178
+
179
+ return autoencoder
@@ -0,0 +1,243 @@
1
+ """
2
+ Copyright (c) [2025] [Erkan Karabulut - DiTEC Project]
3
+
4
+ Includes the Aerial algorithm's source code for association rule (and frequent itemsets) extraction from a
5
+ trained Autoencoder (Neurosymbolic association rule mining from tabular data - https://arxiv.org/abs/2504.19354)
6
+ """
7
+
8
+ import torch
9
+
10
+ from itertools import combinations
11
+
12
+ from aerial.model import AutoEncoder
13
+ import numpy as np
14
+ import logging
15
+
16
+ logger = logging.getLogger("aerial")
17
+
18
+
19
+ def generate_rules(autoencoder: AutoEncoder, ant_similarity=0.5, cons_similarity=0.8, max_antecedents=2,
20
+ target_class=None):
21
+ """
22
+ extract rules from a trained Autoencoder using Aerial+ algorithm
23
+ @param target_class: if given a target class, generate rules with the target class on the right hand side only
24
+ :param max_antecedents: max number of antecedents that the rules will contain
25
+ :param cons_similarity: consequent simi
26
+ :param ant_similarity:
27
+ :param autoencoder:
28
+ """
29
+ if not autoencoder:
30
+ logger.error("A trained Autoencoder has to be provided before generating rules.")
31
+ return None
32
+
33
+ logger.debug("Extracting association rules from the given trained Autoencoder ...")
34
+
35
+ association_rules = []
36
+ input_vector_size = autoencoder.encoder[0].in_features
37
+
38
+ feature_value_indices = autoencoder.feature_value_indices
39
+ target_range = range(input_vector_size)
40
+
41
+ # If target_class is specified, narrow the target range and features
42
+ # this is to do "constraint-based rule mining"
43
+ if target_class:
44
+ for feature in feature_value_indices:
45
+ if feature["feature"] == target_class:
46
+ target_range = range(feature["start"], feature["end"])
47
+ break
48
+
49
+ low_support_antecedents = np.array([])
50
+
51
+ # Initialize input vectors
52
+ unmarked_features = _initialize_input_vectors(input_vector_size, feature_value_indices)
53
+
54
+ # Precompute target indices for softmax to speed things up
55
+ feature_value_indices = [(cat['start'], cat['end']) for cat in feature_value_indices]
56
+ softmax_ranges = feature_value_indices
57
+
58
+ for r in range(1, max_antecedents + 1):
59
+ if r == 2:
60
+ softmax_ranges = [
61
+ (start, end) for (start, end) in softmax_ranges
62
+ if not all(idx in low_support_antecedents for idx in range(start, end))
63
+ ]
64
+
65
+ feature_combinations = list(combinations(softmax_ranges, r)) # Generate combinations
66
+
67
+ # Vectorized model evaluation batch
68
+ batch_vectors = []
69
+ batch_candidate_antecedent_list = []
70
+
71
+ for category_list in feature_combinations:
72
+ test_vectors, candidate_antecedent_list = _mark_features(unmarked_features, list(category_list),
73
+ low_support_antecedents)
74
+ if len(test_vectors) > 0:
75
+ batch_vectors.extend(test_vectors)
76
+ batch_candidate_antecedent_list.extend(candidate_antecedent_list)
77
+
78
+ if batch_vectors:
79
+ batch_vectors = torch.tensor(np.array(batch_vectors), dtype=torch.float32)
80
+ # Perform a single model evaluation for the batch
81
+ implications_batch = autoencoder(batch_vectors, feature_value_indices).detach().numpy()
82
+ for test_vector, implication_probabilities, candidate_antecedents \
83
+ in zip(batch_vectors, implications_batch, batch_candidate_antecedent_list):
84
+ if len(candidate_antecedents) == 0:
85
+ continue
86
+
87
+ # Identify low-support antecedents
88
+ if any(implication_probabilities[ant] <= ant_similarity for ant in candidate_antecedents):
89
+ if r == 1:
90
+ low_support_antecedents = np.append(low_support_antecedents, candidate_antecedents)
91
+ continue
92
+
93
+ # Identify high-support consequents
94
+ consequent_list = [
95
+ prob_index for prob_index in target_range
96
+ if prob_index not in candidate_antecedents and
97
+ implication_probabilities[prob_index] >= cons_similarity
98
+ ]
99
+
100
+ if consequent_list:
101
+ new_rule = _get_rule(candidate_antecedents, consequent_list, autoencoder.feature_values)
102
+ for consequent in new_rule['consequents']:
103
+ association_rules.append({'antecedents': new_rule['antecedents'], 'consequent': consequent})
104
+
105
+ logger.debug("%d association rules extracted.", len(association_rules))
106
+ return association_rules
107
+
108
+
109
+ def generate_frequent_itemsets(autoencoder: AutoEncoder, similarity=0.5, max_length=2):
110
+ """
111
+ Generate frequent itemsets using the Aerial+ algorithm.
112
+ """
113
+ if not autoencoder:
114
+ logger.error("A trained Autoencoder has to be provided before extracting frequent items.")
115
+ return None
116
+
117
+ logger.debug("Extracting frequent items from the given trained Autoencoder ...")
118
+
119
+ frequent_itemsets = []
120
+ input_vector_size = len(autoencoder.feature_values)
121
+
122
+ low_support_antecedents = np.array([])
123
+
124
+ feature_value_indices = autoencoder.feature_value_indices
125
+
126
+ # Initialize input vectors once
127
+ unmarked_features = _initialize_input_vectors(input_vector_size, feature_value_indices)
128
+
129
+ # Precompute target indices for softmax
130
+ feature_value_indices = [(cat['start'], cat['end']) for cat in feature_value_indices]
131
+ softmax_ranges = feature_value_indices
132
+
133
+ # Iteratively process combinations of increasing size
134
+ for r in range(1, max_length + 1):
135
+ softmax_ranges = [
136
+ (start, end) for (start, end) in softmax_ranges
137
+ if not all(idx in low_support_antecedents for idx in range(start, end))
138
+ ]
139
+
140
+ feature_combinations = list(combinations(softmax_ranges, r)) # Generate combinations
141
+
142
+ # Vectorized model evaluation batch
143
+ batch_vectors = []
144
+ batch_candidate_antecedent_list = []
145
+
146
+ for category_list in feature_combinations:
147
+ test_vectors, candidate_antecedent_list = _mark_features(unmarked_features, list(category_list),
148
+ low_support_antecedents)
149
+ if len(test_vectors) > 0:
150
+ batch_vectors.extend(test_vectors)
151
+ batch_candidate_antecedent_list.extend(candidate_antecedent_list)
152
+ if batch_vectors:
153
+ batch_vectors = torch.tensor(np.array(batch_vectors), dtype=torch.float32)
154
+ # Perform a single model evaluation for the batch
155
+ implications_batch = autoencoder(batch_vectors, feature_value_indices).detach().numpy()
156
+ for test_vector, implication_probabilities, candidate_antecedents \
157
+ in zip(batch_vectors, implications_batch, batch_candidate_antecedent_list):
158
+ if len(candidate_antecedents) == 0:
159
+ continue
160
+
161
+ # Identify low-support antecedents
162
+ if any(implication_probabilities[ant] <= similarity for ant in candidate_antecedents):
163
+ if r == 1:
164
+ low_support_antecedents = np.append(low_support_antecedents, candidate_antecedents)
165
+ continue
166
+
167
+ # Add to frequent itemsets
168
+ frequent_itemsets.append(
169
+ [autoencoder.feature_values[idx] for idx in candidate_antecedents]
170
+ )
171
+
172
+ logger.debug("%d frequent itemsets extracted.", len(frequent_itemsets))
173
+ return frequent_itemsets
174
+
175
+
176
+ def _mark_features(unmarked_test_vector, features, low_support_antecedents):
177
+ """
178
+ Create a list of test vectors by marking the given features in the unmarked test vector.
179
+ This optimized version processes features in bulk using NumPy operations.
180
+ """
181
+ input_vector_size = unmarked_test_vector.shape[0]
182
+
183
+ # Compute valid feature ranges excluding low_support_antecedents
184
+ feature_ranges = [
185
+ np.setdiff1d(np.arange(start, end), low_support_antecedents)
186
+ for (start, end) in features
187
+ ]
188
+
189
+ # Create all combinations of feature indices
190
+ combinations = np.array(np.meshgrid(*feature_ranges)).T.reshape(-1, len(features))
191
+
192
+ # Initialize test_vectors and candidate_antecedents
193
+ n_combinations = combinations.shape[0]
194
+ test_vectors = np.tile(unmarked_test_vector, (n_combinations, 1))
195
+ candidate_antecedents = [[] for _ in range(n_combinations)]
196
+
197
+ # Vectorized marking of test_vectors
198
+ for i, (start, end) in enumerate(features):
199
+ # Get the feature range
200
+ valid_indices = combinations[:, i]
201
+
202
+ # Ensure indices are within bounds
203
+ valid_indices = valid_indices[(valid_indices >= 0) & (valid_indices < input_vector_size)]
204
+
205
+ # Mark test_vectors based on valid indices for the current feature
206
+ for j, idx in enumerate(valid_indices):
207
+ test_vectors[j, start:end] = 0 # Set feature range to 0
208
+ test_vectors[j, idx] = 1 # Mark the valid index with 1
209
+ candidate_antecedents[j].append(idx) # Append the index to the j-th test vector's antecedents
210
+
211
+ # Convert lists of candidate_antecedents to numpy arrays
212
+ candidate_antecedents = [np.array(lst) for lst in candidate_antecedents]
213
+ return test_vectors, candidate_antecedents
214
+
215
+
216
+ def _initialize_input_vectors(input_vector_size, categories):
217
+ """
218
+ Initialize the input vectors with equal probabilities for each feature range.
219
+ """
220
+ vector_with_unmarked_features = np.zeros(input_vector_size)
221
+ for category in categories:
222
+ vector_with_unmarked_features[category['start']:category['end']] = 1 / (
223
+ category['end'] - category['start'])
224
+ return vector_with_unmarked_features
225
+
226
+
227
+ def _get_rule(antecedents, consequents, feature_values):
228
+ """
229
+ Find the corresponding feature value for the given antecedents and consequent that are indices in test vectors
230
+ :param antecedents: a list of indices in the test vectors marking the antecedent locations
231
+ :param consequents: an index in the test vector marking the consequent location
232
+ :param feature_values: a list of string that keeps track of which neuron in the Autoencoder input corresponds
233
+ to which feature value in the tabular data
234
+ :return:
235
+ """
236
+ rule = {'antecedents': [], 'consequents': []}
237
+ for antecedent in antecedents:
238
+ rule['antecedents'].append(feature_values[antecedent])
239
+
240
+ for consequent in consequents:
241
+ rule['consequents'].append(feature_values[consequent])
242
+
243
+ return rule