captest 0.13.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ import collections
2
+
3
+
4
+ class ColumnGroups(collections.UserDict):
5
+ def __setitem__(self, key, value):
6
+ # key = (key.replace('-', '_')
7
+ # )
8
+ setattr(self, key, value)
9
+ super().__setitem__(key, value)
10
+
11
+ def __repr__(self):
12
+ """Print `column_groups` dictionary with nice formatting."""
13
+ output = ""
14
+ for grp_id, col_list in self.data.items():
15
+ output += grp_id + ":\n"
16
+ for col in col_list:
17
+ output += " " * 4 + col + "\n"
18
+ return output
19
+
20
+
21
+ # The search strings for types cannot be duplicated across types.
22
+ type_defs = collections.OrderedDict(
23
+ [
24
+ (
25
+ "irr",
26
+ [
27
+ "irradiance",
28
+ "irr",
29
+ "plane of array",
30
+ "poa",
31
+ "ghi",
32
+ "global",
33
+ "glob",
34
+ "w/m^2",
35
+ "w/m2",
36
+ "w/m",
37
+ "w/",
38
+ ],
39
+ ),
40
+ (
41
+ "temp",
42
+ [
43
+ "temperature",
44
+ "temp",
45
+ "degrees",
46
+ "deg",
47
+ "ambient",
48
+ "amb",
49
+ "cell temperature",
50
+ "TArray",
51
+ ],
52
+ ),
53
+ ("wind", ["wind", "speed"]),
54
+ ("pf", ["power factor", "factor", "pf"]),
55
+ ("op_state", ["operating state", "state", "op", "status"]),
56
+ ("real_pwr", ["real power", "ac power", "e_grid"]),
57
+ ("shade", ["fshdbm", "shd", "shade"]),
58
+ ("pvsyt_losses", ["IL Pmax", "IL Pmin", "IL Vmax", "IL Vmin"]),
59
+ ("index", ["index"]),
60
+ ]
61
+ )
62
+
63
+ sub_type_defs = collections.OrderedDict(
64
+ [
65
+ ("ghi", ["sun2", "global horizontal", "ghi", "global", "GlobHor"]),
66
+ ("poa", ["sun", "plane of array", "poa", "GlobInc"]),
67
+ ("amb", ["TempF", "ambient", "amb"]),
68
+ ("mod", ["Temp1", "module", "mod", "TArray"]),
69
+ ("mtr", ["revenue meter", "rev meter", "billing meter", "meter"]),
70
+ ("inv", ["inverter", "inv"]),
71
+ ]
72
+ )
73
+
74
+ irr_sensors_defs = {
75
+ "ref_cell": ["reference cell", "reference", "ref", "referance", "pvel"],
76
+ "pyran": ["pyranometer", "pyran"],
77
+ "clear_sky": ["csky"],
78
+ }
79
+
80
+
81
+ def series_type(series, type_defs):
82
+ """
83
+ Assign columns to a category by analyzing the column names.
84
+
85
+ The type_defs parameter is a dictionary which defines search strings
86
+ for each key, where the key is a categorical name
87
+ and the search strings are possible related names. For example an
88
+ irradiance sensor has the key 'irr' with search strings 'irradiance'
89
+ 'plane of array', 'poa', etc.
90
+
91
+ Parameters
92
+ ----------
93
+ series : pandas series
94
+ Row or column of dataframe passed by pandas.df.apply.
95
+ type_defs : dictionary
96
+ Dictionary with the following structure. See type_defs
97
+ {'category abbreviation': [category search strings]}
98
+
99
+ Returns
100
+ -------
101
+ string
102
+ Returns a string representing the category for the series.
103
+ """
104
+ for key, search_strings in type_defs.items():
105
+ # print('################')
106
+ # print(key)
107
+ for search_str in search_strings:
108
+ # print(search_str)
109
+ if series.name.lower().find(search_str.lower()) == -1:
110
+ continue
111
+ else:
112
+ return key
113
+ return ""
114
+
115
+
116
+ def group_columns(data):
117
+ """
118
+ Create a dict of raw column names paired to categorical column names.
119
+
120
+ Uses multiple type_def formatted dictionaries to determine the type,
121
+ sub-type, and equipment type for data series of a dataframe. The
122
+ determined types are concatenated to a string used as a dictionary key
123
+ with a list of one or more original column names as the paired value.
124
+
125
+ Parameters
126
+ ----------
127
+ data : DataFrame
128
+ Data with columns to group.
129
+
130
+ Returns
131
+ -------
132
+ cg : ColumnGroups
133
+
134
+ Todo
135
+ ----
136
+ type_defs parameter
137
+ Consider refactoring to have a list of type_def dictionaries as an
138
+ input and loop over each dict in the list.
139
+ """
140
+ col_types = data.apply(series_type, args=(type_defs,)).tolist()
141
+ sub_types = data.apply(series_type, args=(sub_type_defs,)).tolist()
142
+ irr_types = data.apply(series_type, args=(irr_sensors_defs,)).tolist()
143
+
144
+ col_indices = []
145
+ for typ, sub_typ, irr_typ in zip(col_types, sub_types, irr_types):
146
+ col_indices.append("_".join([typ, sub_typ, irr_typ]))
147
+
148
+ names = []
149
+ for new_name, old_name in zip(col_indices, data.columns.tolist()):
150
+ names.append((new_name, old_name))
151
+ names.sort()
152
+ orig_names_sorted = [name_pair[1] for name_pair in names]
153
+
154
+ trans = {}
155
+ col_indices.sort()
156
+ cols = list(set(col_indices))
157
+ cols.sort()
158
+ for name in set(cols):
159
+ start = col_indices.index(name)
160
+ count = col_indices.count(name)
161
+ trans[name] = orig_names_sorted[start : start + count]
162
+
163
+ return ColumnGroups(trans)