atspm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atspm/Aggregations.py ADDED
@@ -0,0 +1,326 @@
1
+
2
+ import pandas as pd
3
+ import duckdb
4
+ import os
5
+ #import importlib.resources as pkg_resources
6
+ #with pkg_resources.open_text('ATSPM_Aggregation', 'queries.sql') as f:
7
+ # sql_query = f.read()
8
+
9
+
10
+ class Utils:
11
+ '''Helper functions to be shared across classes'''
12
+ # Run queries in MS SQL Server
13
+ def query_mssql(self, query, server, database):
14
+ from sqlalchemy import create_engine
15
+ import warnings
16
+ query = "SET NOCOUNT ON; " + query
17
+ connection_string = f"mssql+pyodbc://@{server}/{database}?trusted_connection=yes&driver=SQL+Server"
18
+ engine = create_engine(connection_string)
19
+ conn = engine.raw_connection() # Uses DBAPI
20
+ # Supress warning from Pandas where it says it's only tested on sqlalchemy
21
+ # This method is MUCH faster, so I'll stick with it
22
+ with warnings.catch_warnings():
23
+ warnings.simplefilter("ignore")
24
+ df = pd.read_sql_query(query, conn)
25
+ conn.close()
26
+ engine.dispose()
27
+ return df
28
+
29
+ # Additional libraries are imported inside of optional functions: query_mssql &
30
+ class Aggregations(Utils):
31
+ def __init__(self, phase_detector_config, data=None, mssql_server=None, mssql_database=None, duckdb_threads=None):
32
+ # Connect to DuckDB and register table
33
+ self.duck_con = duckdb.connect(database=':memory:', read_only=False)
34
+
35
+ # Hi-res Event Codes to Include When Loading Data FOR DETECTOR-BASED ATSPMs
36
+ self.event_codes = '1,8,10,81,82'
37
+ # Hi-res Event Codes to Include When Loading Data FOR GENERIC ATSPMs
38
+ self.event_codes_generic = '4,5,6'
39
+
40
+ # Load data if provided, ensuring proper format
41
+ try:
42
+ if data is not None:
43
+ # Set data types
44
+ data = data.astype({'DeviceId':'uint16', 'EventId':'uint8', 'Parameter':'uint8'})
45
+ data = duckdb.query(f'SELECT DISTINCT * FROM data WHERE EventId IN({self.event_codes},{self.event_codes_generic})').fetchdf()
46
+ self.duck_con.register('raw_data', data)
47
+ except Exception as e:
48
+ print(e)
49
+ print('Data must be a pandas dataframe with columns: DeviceId, EventId, Parameter, Timestamp')
50
+
51
+ # Option to limit CPU use if needed
52
+ if isinstance(duckdb_threads, int):
53
+ duckdb.query(f"SET threads to {duckdb_threads}")
54
+ #print(duckdb.query(f"SELECT current_setting('threads');"))
55
+
56
+ # Define phase-detector configurations dictionary
57
+ # First entry is configurations dataframe, second is devices
58
+ self.configs = dict()
59
+ def declare_config(measure_detection):
60
+ measure, detection = measure_detection
61
+ try:
62
+ self.configs[f'{measure}_config'] = phase_detector_config[phase_detector_config.Function == detection][['Phase', 'Parameter', 'DeviceId']]
63
+ self.configs[f'{measure}_devices'] = set(self.configs[f'{measure}_config'].DeviceId)
64
+ assert len(self.configs[f'{measure}_devices']) > 0
65
+ except Exception as e:
66
+ print(f'{measure} Detection Not Found!')
67
+ print(e)
68
+ for item in [('split_fail', 'Presence'), ('yellow_red', 'Yellow_Red'), ('arrival_on_green', 'Advance')]:
69
+ declare_config(item)
70
+
71
+ self.mssql_server = mssql_server
72
+ self.mssql_database = mssql_database
73
+
74
+ # Get the absolute path of the current file
75
+ current_file_path = os.path.abspath(__file__)
76
+ # Construct the absolute path to the queries.sql file
77
+ queries_file_path = os.path.join(os.path.dirname(current_file_path), 'queries.sql')
78
+ # Load SQL Queries Into Dicitonary
79
+ with open(queries_file_path, 'r') as file:
80
+ content = file.read()
81
+ queries = content.split(';') # Splits queries by ';' which ends a SQL command
82
+ self.queries_dict = {}
83
+ for query in queries:
84
+ if query.strip() != '': # Ignore empty lines
85
+ lines = query.strip().split('\n') # Split lines
86
+ name = lines[0].strip('- ').strip() # Extract query name from the first line
87
+ sql_query = '\n'.join(lines[1:]).strip() # Join the remaining lines to form the query
88
+ self.queries_dict[name] = sql_query
89
+
90
+ # Get raw event data from SQL Server
91
+ def get_mssql_data(self, start, end, event_codes, filtered_devices=None):
92
+
93
+ if filtered_devices is not None:
94
+ # Start constructing a long SQL script
95
+ sql_script = "CREATE TABLE #TempDeviceTable (DeviceId int); "
96
+
97
+ # Add an INSERT statement to the script for each device
98
+ for device in filtered_devices:
99
+ sql_script += f"INSERT INTO #TempDeviceTable (DeviceId) VALUES ({device}); "
100
+
101
+ # Modify the device filter to use a JOIN instead of IN
102
+ device_filter = """
103
+ INNER JOIN #TempDeviceTable
104
+ ON ASCEvents.DeviceId = #TempDeviceTable.DeviceId
105
+ """
106
+ else:
107
+ device_filter = ''
108
+ sql_script = 'SET NOCOUNT ON; '
109
+
110
+ # Add the main SELECT statement to the script
111
+ sql_script += f"""
112
+ SELECT DISTINCT *
113
+ FROM ASCEvents
114
+ {device_filter}
115
+ WHERE ASCEvents.TimeStamp >= '{start}'
116
+ AND ASCEvents.TimeStamp < '{end}'
117
+ AND EventId IN({event_codes});
118
+ """
119
+
120
+ if filtered_devices is not None:
121
+ # Add a statement to drop the temp table to the script
122
+ sql_script += "DROP TABLE #TempDeviceTable;"
123
+ #print('\n'*3,sql_script,'\n'*3)
124
+ #print('Loading data from SQL Server for quer \n', sql_script, '\n')
125
+ # Load raw data and downsize the dtypes for efficiency
126
+ df = self.query_mssql(sql_script, self.mssql_server, self.mssql_database)
127
+ print('loaded data from SQL Server')
128
+ df = df.astype({'DeviceId':'uint16', 'EventId':'uint8', 'Parameter':'uint8'})
129
+ # Register the data in DuckDB (drop if exists)
130
+ self.duck_con.execute('DROP VIEW IF EXISTS raw_data')
131
+ self.duck_con.register('raw_data', df)
132
+ #print(sql_script)
133
+
134
+
135
+ # Helper function to modify and run DuckDB queries
136
+ def create_view(self, query_name, view_name, from_table=None, variable1=None, debug=False):
137
+ '''
138
+ query_name: name of query to run
139
+ view_name: name of view to create
140
+ from_table: table to use in query
141
+ variable1: variable to use in query'''
142
+
143
+ query = self.queries_dict[query_name]
144
+ if from_table is not None:
145
+ query = query.replace('@table', from_table)
146
+ if variable1 is not None:
147
+ query = query.replace('@variable1', variable1)
148
+ if debug:
149
+ print(query)
150
+ # Create the view (drop if it already exists)
151
+ self.duck_con.execute(f"DROP VIEW IF EXISTS {view_name}")
152
+ self.duck_con.execute(f"CREATE TEMPORARY VIEW {view_name} AS {query}")
153
+
154
+
155
+ # Function to check if data is loaded
156
+ def check_data(self):
157
+ tables = [x[0] for x in self.duck_con.execute("SHOW TABLES").fetchall()]
158
+ if 'raw_data' not in tables:
159
+ print('Data is not loaded yet!')
160
+ raise ValueError("Data is not loaded yet!")
161
+ # Check if data table is empty
162
+ if self.duck_con.execute("SELECT COUNT(*) FROM raw_data LIMIT 1").fetchall()[0][0] == 0:
163
+ print('Data is empty!')
164
+ raise ValueError("Data is empty!")
165
+ #print('Data is loaded and ready to go!')
166
+
167
+
168
+ # Aggregate Split Failures, by approach is default, set to false to do by lane
169
+ # Based on research, about 70% may be good threshold for 20ft long zones with approach based
170
+ def split_failure(self, by_approach=True, green_occupancy_threshold=0.80, red_occupancy_threshold=0.80):
171
+ # Check if data table exists in DuckDB
172
+ self.check_data()
173
+ # Now transform data into split failures
174
+ # NOTE: TABLE NAMES ARE HARD CODED INTO queries.sql
175
+ # DON'T CHANGE THESE UNLESS YOU DO IT IN BOTH FILES
176
+
177
+ # Register configs in DuckDB
178
+ self.duck_con.register('configs', self.configs['split_fail_config'])
179
+
180
+ # Run SQL Queries to transform data
181
+ # Each step is an immaterialized view that will be optimized together at the end
182
+ self.create_view('detector_with_phase', view_name='view1')
183
+ self.create_view('impute_actuations',view_name='view2', from_table='view1')
184
+ # by_approach combines detectors accross phase
185
+ if by_approach:
186
+ self.create_view('combine_detectors_ByApproach', view_name='view3a', from_table='view2')
187
+ self.create_view('phase_with_detector_ByApproach', view_name='view3', from_table='view3a')
188
+ else:
189
+ self.create_view('phase_with_detector_ByLane', view_name='view3', from_table='view2')
190
+ # Remaining queries are same for by approach or by lane
191
+ self.create_view('with_barrier', view_name='view4', from_table='view3', variable1='5')#add the barrier at 5 seconds
192
+ self.create_view('with_cycle', view_name='view5', from_table='view4')
193
+ self.create_view('time_diff', view_name='view6', from_table='view5')
194
+ self.create_view('aggregate', view_name='view7', from_table='view6')
195
+ self.create_view('final_SF', view_name='view8', from_table='view7')
196
+ # Apply red/green occupancy thresholds for classification
197
+ query = f"""
198
+ SELECT *,
199
+ CASE WHEN
200
+ Red_Occupancy>={red_occupancy_threshold}
201
+ AND Green_Occupancy>={green_occupancy_threshold}
202
+ THEN True ELSE False END AS Split_Failure
203
+ FROM view8
204
+ """
205
+ return self.duck_con.query(query).fetchdf()
206
+
207
+
208
+ # Yellow and Red Actuations
209
+ def yellow_red(self, bin_size=15, latency_offset=1.5):
210
+ # Check if data table exists in DuckDB
211
+ self.check_data()
212
+ # NOTE: TABLE NAMES ARE HARD CODED INTO queries.sql
213
+ # DON'T CHANGE THESE UNLESS YOU DO IT IN BOTH FILES
214
+ # Register configs in DuckDB
215
+ self.duck_con.register('configs', self.configs['yellow_red_config'])
216
+ # Run SQL Queries to transform data
217
+ # Each step is an immaterialized view that will be optimized together at the end
218
+ self.create_view('detector_with_phase_ON_ONLY', view_name='view1', variable1=str(latency_offset)) #only contains detector on events, shifted by 1.5 seconds for latency
219
+ self.create_view('phase_with_detector_ByApproach', view_name='view2', from_table='view1') #contains phase data and detector data together
220
+ self.create_view('with_cycle', view_name='view3', from_table='view2')
221
+ self.create_view('valid_cycles', view_name='view4', from_table='view3')
222
+ self.create_view('red_offset', view_name='view5', from_table='view4')
223
+ return self.duck_con.query('SELECT * FROM view5').fetchdf()
224
+
225
+
226
+ # Arrival on Green
227
+ def arrival_on_green(self, bin_size=15, latency_offset=0):
228
+ # Check if data table exists in DuckDB
229
+ self.check_data()
230
+ # NOTE: TABLE NAMES ARE HARD CODED INTO queries.sql
231
+ # DON'T CHANGE THESE UNLESS YOU DO IT IN BOTH FILES
232
+ # Register configs in DuckDB
233
+ self.duck_con.register('configs', self.configs['arrival_on_green_config'])
234
+ # Run SQL Queries to transform data
235
+ # Each step is an immaterialized view that will be optimized together at the end
236
+ self.create_view('detector_with_phase_ON_ONLY', view_name='view1', variable1=str(latency_offset)) #only contains detector on events. latency offset=0?
237
+ self.create_view('phase_with_detector_ByApproach', view_name='view2', from_table='view1') #contains phase data and detector data together
238
+ self.create_view('with_cycle', view_name='view3', from_table='view2')
239
+ self.create_view('arrival_on_green', view_name='view4', from_table='view3', variable1=str(bin_size))
240
+ return self.duck_con.query('SELECT * FROM view4').fetchdf()
241
+
242
+
243
+ # Phase Terminations
244
+ def phase_termination(self, bin_size=15):
245
+ # Check if data table exists in DuckDB
246
+ self.check_data()
247
+ #print('working on phase termination (inside Aggregations.py)')
248
+ self.create_view('phase_termination', view_name='view1', from_table='raw_data', variable1=str(bin_size), debug=False)
249
+ #print('view created')
250
+ return self.duck_con.query('SELECT * FROM view1').fetchdf()
251
+
252
+ # Optional, plot occupancy
253
+ def plot_occupancy(self, sf, DeviceId, Phase=None, Detector=None):
254
+ import matplotlib.pyplot as plt
255
+ import matplotlib.dates as mdates
256
+ from matplotlib.ticker import FuncFormatter
257
+ from matplotlib.lines import Line2D
258
+ # Filter DataFrame to include only rows with the given DeviceId and Detector
259
+ if Phase is None and Detector is not None:
260
+ sf_filtered = sf[(sf['DeviceId'] == DeviceId) & (sf['Detector'] == Detector)].sort_values('TimeStamp')
261
+ name = f"Detector {Detector}"
262
+ elif Detector is None and Phase is not None:
263
+ sf_filtered = sf[(sf['DeviceId'] == DeviceId) & (sf['Phase'] == Phase)].sort_values('TimeStamp')
264
+ name = f"Phase {Phase}"
265
+ else:
266
+ sf_filtered = sf[(sf['DeviceId'] == DeviceId) & (sf['Phase'] == Phase) & (sf['Detector'] == Detector)].sort_values('TimeStamp')
267
+ name = f"Detector {Detector}, Phase {Phase}"
268
+ sf_filtered.set_index('TimeStamp', inplace=True)
269
+
270
+ # Create a scatter plot for Green_Occupancy and Red_Occupancy
271
+ plt.figure(figsize=(10,5))
272
+
273
+ # Calculate average occupancy in 15-minute intervals and fill missing data with zero
274
+ average_green = sf_filtered['Green_Occupancy'].resample('15T').mean().fillna(0)
275
+ average_red = sf_filtered['Red_Occupancy'].resample('15T').mean().fillna(0)
276
+
277
+ # Plot the average occupancy as a stepped line
278
+ plt.step(average_green.index, average_green, where='post', color='green', linestyle='-', label='Average Green Occupancy')
279
+ plt.step(average_red.index, average_red, where='post', color='red', linestyle='-', label='Average Red Occupancy')
280
+
281
+ # Find all timestamps where both Green_Occupancy and Red_Occupancy are above 0.79
282
+ timestamps = sf_filtered[sf_filtered['Split_Failure']].index
283
+
284
+ # Add a vertical line for each of those timestamps with a thinner line
285
+ for timestamp in timestamps:
286
+ plt.axvline(x=timestamp, color='yellow', linewidth=0.5)
287
+
288
+ plt.scatter(sf_filtered.index, sf_filtered['Green_Occupancy'], color='green', s=3, label='Green Occupancy')
289
+ plt.scatter(sf_filtered.index, sf_filtered['Red_Occupancy'], color='red', s=3, label='Red Occupancy')
290
+
291
+ plt.xlabel('Timestamp')
292
+ plt.ylabel('Occupancy')
293
+
294
+ # Use FuncFormatter to display y-axis values as percentages
295
+ plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
296
+
297
+ # Use DateFormatter and HourLocator for a cleaner x-axis
298
+ ax = plt.gca()
299
+ ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))
300
+ ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
301
+
302
+ # Add gridlines
303
+ plt.grid(True)
304
+
305
+ # Remove whitespace by setting limits
306
+ plt.xlim(sf_filtered.index.min(), sf_filtered.index.max())
307
+ plt.ylim(0, 1)
308
+
309
+ # Rotate x-axis labels
310
+ plt.xticks(rotation=45)
311
+
312
+ # Create a custom legend entry
313
+ custom_line = Line2D([0], [0], color='yellow', lw=2, label='Split Failure (vertical line)')
314
+
315
+ # When calling legend(), append the custom entry to the list of handles
316
+ handles, labels = plt.gca().get_legend_handles_labels()
317
+ handles.append(custom_line)
318
+ plt.legend(handles=handles, loc='lower left', framealpha=1)
319
+
320
+ plt.title(f'Split Failures for DeviceId {DeviceId}, {name}')
321
+ plt.tight_layout()
322
+ plt.show()
323
+
324
+
325
+ class TimelineEvents:
326
+ pass
atspm/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from .sample_data import sample_data
2
+ from .Aggregations import Aggregations
atspm/sample_data.py ADDED
@@ -0,0 +1,14 @@
1
+ # sample_data.py inside the atspm package
2
+ import pandas as pd
3
+ import os
4
+
5
+ # Assuming this file is in the same directory as the `data` directory
6
+ data_dir = os.path.join(os.path.dirname(__file__), 'data')
7
+
8
+ class SampleData:
9
+ def __init__(self):
10
+ self.config = pd.read_parquet(os.path.join(data_dir, 'sample_detector-config.parquet'))
11
+ self.data = pd.read_parquet(os.path.join(data_dir, 'sample_hi-res_data.parquet'))
12
+
13
+ # Create an instance of the class
14
+ sample_data = SampleData()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Shawn Strasser
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.1
2
+ Name: atspm
3
+ Version: 1.0.0
4
+ Summary: Aggregates hi-res data from ATC traffic signal controllers into 15-minute binned ATSPM/performance measures.
5
+ Author-email: Shawn Strasser <shawn.strasser@odot.oregon.gov>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.7
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+
13
+ # ATSPM Aggregation
14
+
15
+ This repository contains WORK-IN-PROGRESS code for efficient and scalable aggregation of Automated Traffic Signal Performance Measures (ATSPMs). It includes split failures, Yellow/Red actuations, Percent Arrival on Green. I'll add some more examples and documentation soon. This probably isn't ready for production use yet, use with caution, it will be more developed/tested by end of July 2023.
16
+
17
+ ## Quick Usage Overview for Split Failures
18
+
19
+ ```python
20
+ # Import libraries
21
+ import pandas as pd
22
+ from Aggregations import Aggregations
23
+
24
+ # Load hi-res and detector-phase configuration data
25
+ raw_data = pd.read_parquet('sample_hi-res_data.parquet')
26
+ config = pd.read_parquet('sample_detector-config.parquet')
27
+
28
+ # Instantiate Aggregations class and load hi-res data and detector configurations into it
29
+ aggr = Aggregations(data=raw_data, phase_detector_config=config)
30
+
31
+ # Return aggregate split failures
32
+ sf = aggr.split_failure()
33
+
34
+ # Plot to inspect results (optional)
35
+ aggr.plot_occupancy(sf, DeviceId=240, Phase=1)
36
+ ```
37
+ ![Alt text](example-SF-chart.png)
38
+
39
+ ## Try it on Colab!
40
+ If you'd like to try out the code without installing anything, here is a Google Colab notebook:
41
+ https://colab.research.google.com/drive/1xHs7H4SKRiIoX3GtUxDG0qiWS4zdmYsr
42
+
43
+ ## Background
44
+
45
+ ATSPMs can be computationally expensive, making it difficult to scale simutaniously accross all traffic signals at an agency. Initially, for-loops were utilized to produce aggregations for a single detector at a time taking several seconds each, and this proved to be way too slow to scale to the entire signal system. That code is still available in the master branch. This branch focuses on vectorizing code to be able to run operations on every detector at every signal at the same time. The operations are done using SQL queries, which are executed using DuckDB, which is a library that is open source, fast, and utilizes all CPU cores.
46
+
47
+
48
+
49
+ <br>
50
+ Stay tuned for more!
51
+
52
+
@@ -0,0 +1,8 @@
1
+ atspm/Aggregations.py,sha256=fcEDnamzdmwYwLCTiRR4_7pt-fNDOiR9UAkvhTNajas,16337
2
+ atspm/__init__.py,sha256=wEUHKOMbciMMHFOUn3y50AhcJjW9CqZwUx9R7LyMCEQ,78
3
+ atspm/sample_data.py,sha256=wquzOo-OcAe72OBabVQuJWHKgzaxfLEfTayMDLznlLU,505
4
+ atspm-1.0.0.dist-info/LICENSE,sha256=0K7oeO72fUbqlSWTLP4XvLsHo0gP6Z8CCpgUcREYOKg,1092
5
+ atspm-1.0.0.dist-info/METADATA,sha256=NL_XSEjdqkykzHgX59KxY7T62mWp2CGp38yEdQGNsvo,2452
6
+ atspm-1.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
7
+ atspm-1.0.0.dist-info/top_level.txt,sha256=KAAyAQpmMhRsMSAe_r7yRy8gt8Kvkf8vJG-CPnxghis,6
8
+ atspm-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.43.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ atspm