PyPI - atspm - Versions diffs - 1.0.0__py3-none-any.whl - Mend

atspm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

atspm/Aggregations.py +326 -0
atspm/__init__.py +2 -0
atspm/sample_data.py +14 -0
atspm-1.0.0.dist-info/LICENSE +21 -0
atspm-1.0.0.dist-info/METADATA +52 -0
atspm-1.0.0.dist-info/RECORD +8 -0
atspm-1.0.0.dist-info/WHEEL +5 -0
atspm-1.0.0.dist-info/top_level.txt +1 -0

atspm/Aggregations.py ADDED Viewed

@@ -0,0 +1,326 @@
+import pandas as pd
+import duckdb
+import os
+#import importlib.resources as pkg_resources
+#with pkg_resources.open_text('ATSPM_Aggregation', 'queries.sql') as f:
+#    sql_query = f.read()
+class Utils:
+    '''Helper functions to be shared across classes'''
+    # Run queries in MS SQL Server
+    def query_mssql(self, query, server, database):
+        from sqlalchemy import create_engine
+        import warnings
+        query = "SET NOCOUNT ON; " + query
+        connection_string = f"mssql+pyodbc://@{server}/{database}?trusted_connection=yes&driver=SQL+Server"
+        engine = create_engine(connection_string)
+        conn = engine.raw_connection() # Uses DBAPI
+        # Supress warning from Pandas where it says it's only tested on sqlalchemy
+        # This method is MUCH faster, so I'll stick with it
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            df = pd.read_sql_query(query, conn)
+        conn.close()
+        engine.dispose()
+        return df
+# Additional libraries are imported inside of optional functions: query_mssql &
+class Aggregations(Utils):
+    def __init__(self, phase_detector_config, data=None, mssql_server=None, mssql_database=None, duckdb_threads=None):
+        # Connect to DuckDB and register table
+        self.duck_con = duckdb.connect(database=':memory:', read_only=False)
+        # Hi-res Event Codes to Include When Loading Data FOR DETECTOR-BASED ATSPMs
+        self.event_codes = '1,8,10,81,82'
+        # Hi-res Event Codes to Include When Loading Data FOR GENERIC ATSPMs
+        self.event_codes_generic = '4,5,6'
+        # Load data if provided, ensuring proper format
+        try:
+            if data is not None:
+                # Set data types
+                data = data.astype({'DeviceId':'uint16', 'EventId':'uint8', 'Parameter':'uint8'})
+                data = duckdb.query(f'SELECT DISTINCT * FROM data WHERE EventId IN({self.event_codes},{self.event_codes_generic})').fetchdf()
+                self.duck_con.register('raw_data', data)
+        except Exception as e:
+            print(e)
+            print('Data must be a pandas dataframe with columns: DeviceId, EventId, Parameter, Timestamp')
+        # Option to limit CPU use if needed
+        if isinstance(duckdb_threads, int):
+            duckdb.query(f"SET threads to {duckdb_threads}")
+            #print(duckdb.query(f"SELECT current_setting('threads');"))
+        # Define phase-detector configurations dictionary
+        # First entry is configurations dataframe, second is devices
+        self.configs = dict()
+        def declare_config(measure_detection):
+            measure, detection = measure_detection
+            try:
+                self.configs[f'{measure}_config'] = phase_detector_config[phase_detector_config.Function == detection][['Phase', 'Parameter', 'DeviceId']]
+                self.configs[f'{measure}_devices'] = set(self.configs[f'{measure}_config'].DeviceId)
+                assert len(self.configs[f'{measure}_devices']) > 0
+            except Exception as e:
+                print(f'{measure} Detection Not Found!')
+                print(e)
+        for item in [('split_fail', 'Presence'), ('yellow_red', 'Yellow_Red'), ('arrival_on_green', 'Advance')]:
+            declare_config(item)
+        self.mssql_server = mssql_server
+        self.mssql_database = mssql_database
+        # Get the absolute path of the current file
+        current_file_path = os.path.abspath(__file__)
+        # Construct the absolute path to the queries.sql file
+        queries_file_path = os.path.join(os.path.dirname(current_file_path), 'queries.sql')
+        # Load SQL Queries Into Dicitonary
+        with open(queries_file_path, 'r') as file:
+            content = file.read()
+        queries = content.split(';')  # Splits queries by ';' which ends a SQL command
+        self.queries_dict = {}
+        for query in queries:
+            if query.strip() != '':  # Ignore empty lines
+                lines = query.strip().split('\n')  # Split lines
+                name = lines[0].strip('- ').strip()  # Extract query name from the first line
+                sql_query = '\n'.join(lines[1:]).strip()  # Join the remaining lines to form the query
+                self.queries_dict[name] = sql_query
+    # Get raw event data from SQL Server
+    def get_mssql_data(self, start, end, event_codes, filtered_devices=None):
+        if filtered_devices is not None:
+            # Start constructing a long SQL script
+            sql_script = "CREATE TABLE #TempDeviceTable (DeviceId int); "
+            # Add an INSERT statement to the script for each device
+            for device in filtered_devices:
+                sql_script += f"INSERT INTO #TempDeviceTable (DeviceId) VALUES ({device}); "
+            # Modify the device filter to use a JOIN instead of IN
+            device_filter = """
+            INNER JOIN #TempDeviceTable
+            ON ASCEvents.DeviceId = #TempDeviceTable.DeviceId
+            """
+        else:
+            device_filter = ''
+            sql_script = 'SET NOCOUNT ON; '
+        # Add the main SELECT statement to the script
+        sql_script += f"""
+        SELECT DISTINCT *
+        FROM ASCEvents
+        {device_filter}
+        WHERE ASCEvents.TimeStamp >= '{start}'
+        AND ASCEvents.TimeStamp < '{end}'
+        AND EventId IN({event_codes});
+        """
+        if filtered_devices is not None:
+            # Add a statement to drop the temp table to the script
+            sql_script += "DROP TABLE #TempDeviceTable;"
+        #print('\n'*3,sql_script,'\n'*3)
+        #print('Loading data from SQL Server for quer \n', sql_script, '\n')
+        # Load raw data and downsize the dtypes for efficiency
+        df = self.query_mssql(sql_script, self.mssql_server, self.mssql_database)
+        print('loaded data from SQL Server')
+        df = df.astype({'DeviceId':'uint16', 'EventId':'uint8', 'Parameter':'uint8'})
+        # Register the data in DuckDB (drop if exists)
+        self.duck_con.execute('DROP VIEW IF EXISTS raw_data')
+        self.duck_con.register('raw_data', df)
+        #print(sql_script)
+    # Helper function to modify and run DuckDB queries
+    def create_view(self, query_name, view_name, from_table=None, variable1=None, debug=False):
+        '''
+        query_name: name of query to run
+        view_name: name of view to create
+        from_table: table to use in query
+        variable1: variable to use in query'''
+        query = self.queries_dict[query_name]
+        if from_table is not None:
+            query = query.replace('@table', from_table)
+        if variable1 is not None:
+            query = query.replace('@variable1', variable1)
+        if debug:
+            print(query)
+        # Create the view (drop if it already exists)
+        self.duck_con.execute(f"DROP VIEW IF EXISTS {view_name}")
+        self.duck_con.execute(f"CREATE TEMPORARY VIEW {view_name} AS {query}")
+    # Function to check if data is loaded
+    def check_data(self):
+        tables = [x[0] for x in self.duck_con.execute("SHOW TABLES").fetchall()]
+        if 'raw_data' not in tables:
+            print('Data is not loaded yet!')
+            raise ValueError("Data is not loaded yet!")
+        # Check if data table is empty
+        if self.duck_con.execute("SELECT COUNT(*) FROM raw_data LIMIT 1").fetchall()[0][0] == 0:
+            print('Data is empty!')
+            raise ValueError("Data is empty!")
+        #print('Data is loaded and ready to go!')
+    # Aggregate Split Failures, by approach is default, set to false to do by lane
+    # Based on research, about 70% may be good threshold for 20ft long zones with approach based
+    def split_failure(self, by_approach=True, green_occupancy_threshold=0.80, red_occupancy_threshold=0.80):
+        # Check if data table exists in DuckDB
+        self.check_data()
+        # Now transform data into split failures
+        # NOTE: TABLE NAMES ARE HARD CODED INTO queries.sql
+        # DON'T CHANGE THESE UNLESS YOU DO IT IN BOTH FILES
+        # Register configs in DuckDB
+        self.duck_con.register('configs', self.configs['split_fail_config'])
+        # Run SQL Queries to transform data
+        # Each step is an immaterialized view that will be optimized together at the end
+        self.create_view('detector_with_phase', view_name='view1')
+        self.create_view('impute_actuations',view_name='view2', from_table='view1')
+        # by_approach combines detectors accross phase
+        if by_approach:
+            self.create_view('combine_detectors_ByApproach', view_name='view3a', from_table='view2')
+            self.create_view('phase_with_detector_ByApproach', view_name='view3', from_table='view3a')
+        else:
+            self.create_view('phase_with_detector_ByLane', view_name='view3', from_table='view2')
+        # Remaining queries are same for by approach or by lane
+        self.create_view('with_barrier', view_name='view4', from_table='view3', variable1='5')#add the barrier at 5 seconds
+        self.create_view('with_cycle', view_name='view5', from_table='view4')
+        self.create_view('time_diff', view_name='view6', from_table='view5')
+        self.create_view('aggregate', view_name='view7', from_table='view6')
+        self.create_view('final_SF', view_name='view8', from_table='view7')
+        # Apply red/green occupancy thresholds for classification
+        query = f"""
+            SELECT *,
+            CASE WHEN
+                Red_Occupancy>={red_occupancy_threshold}
+                AND Green_Occupancy>={green_occupancy_threshold}
+                THEN True ELSE False END AS Split_Failure
+            FROM view8
+        """
+        return self.duck_con.query(query).fetchdf()
+    # Yellow and Red Actuations
+    def yellow_red(self, bin_size=15, latency_offset=1.5):
+        # Check if data table exists in DuckDB
+        self.check_data()
+        # NOTE: TABLE NAMES ARE HARD CODED INTO queries.sql
+        # DON'T CHANGE THESE UNLESS YOU DO IT IN BOTH FILES
+        # Register configs in DuckDB
+        self.duck_con.register('configs', self.configs['yellow_red_config'])
+        # Run SQL Queries to transform data
+        # Each step is an immaterialized view that will be optimized together at the end
+        self.create_view('detector_with_phase_ON_ONLY', view_name='view1', variable1=str(latency_offset)) #only contains detector on events, shifted by 1.5 seconds for latency
+        self.create_view('phase_with_detector_ByApproach', view_name='view2', from_table='view1') #contains phase data and detector data together
+        self.create_view('with_cycle', view_name='view3', from_table='view2')
+        self.create_view('valid_cycles', view_name='view4', from_table='view3')
+        self.create_view('red_offset', view_name='view5', from_table='view4')
+        return self.duck_con.query('SELECT * FROM view5').fetchdf()
+    # Arrival on Green
+    def arrival_on_green(self, bin_size=15, latency_offset=0):
+        # Check if data table exists in DuckDB
+        self.check_data()
+        # NOTE: TABLE NAMES ARE HARD CODED INTO queries.sql
+        # DON'T CHANGE THESE UNLESS YOU DO IT IN BOTH FILES
+        # Register configs in DuckDB
+        self.duck_con.register('configs', self.configs['arrival_on_green_config'])
+        # Run SQL Queries to transform data
+        # Each step is an immaterialized view that will be optimized together at the end
+        self.create_view('detector_with_phase_ON_ONLY', view_name='view1', variable1=str(latency_offset)) #only contains detector on events. latency offset=0?
+        self.create_view('phase_with_detector_ByApproach', view_name='view2', from_table='view1') #contains phase data and detector data together
+        self.create_view('with_cycle', view_name='view3', from_table='view2')
+        self.create_view('arrival_on_green', view_name='view4', from_table='view3', variable1=str(bin_size))
+        return self.duck_con.query('SELECT * FROM view4').fetchdf()
+    # Phase Terminations
+    def phase_termination(self, bin_size=15):
+        # Check if data table exists in DuckDB
+        self.check_data()
+        #print('working on phase termination (inside Aggregations.py)')
+        self.create_view('phase_termination', view_name='view1', from_table='raw_data', variable1=str(bin_size), debug=False)
+        #print('view created')
+        return self.duck_con.query('SELECT * FROM view1').fetchdf()
+    # Optional, plot occupancy
+    def plot_occupancy(self, sf, DeviceId, Phase=None, Detector=None):
+        import matplotlib.pyplot as plt
+        import matplotlib.dates as mdates
+        from matplotlib.ticker import FuncFormatter
+        from matplotlib.lines import Line2D
+        # Filter DataFrame to include only rows with the given DeviceId and Detector
+        if Phase is None and Detector is not None:
+            sf_filtered = sf[(sf['DeviceId'] == DeviceId) & (sf['Detector'] == Detector)].sort_values('TimeStamp')
+            name = f"Detector {Detector}"
+        elif Detector is None and Phase is not None:
+            sf_filtered = sf[(sf['DeviceId'] == DeviceId) & (sf['Phase'] == Phase)].sort_values('TimeStamp')
+            name = f"Phase {Phase}"
+        else:
+            sf_filtered = sf[(sf['DeviceId'] == DeviceId) & (sf['Phase'] == Phase) & (sf['Detector'] == Detector)].sort_values('TimeStamp')
+            name = f"Detector {Detector}, Phase {Phase}"
+        sf_filtered.set_index('TimeStamp', inplace=True)
+        # Create a scatter plot for Green_Occupancy and Red_Occupancy
+        plt.figure(figsize=(10,5))
+        # Calculate average occupancy in 15-minute intervals and fill missing data with zero
+        average_green = sf_filtered['Green_Occupancy'].resample('15T').mean().fillna(0)
+        average_red = sf_filtered['Red_Occupancy'].resample('15T').mean().fillna(0)
+        # Plot the average occupancy as a stepped line
+        plt.step(average_green.index, average_green, where='post', color='green', linestyle='-', label='Average Green Occupancy')
+        plt.step(average_red.index, average_red, where='post', color='red', linestyle='-', label='Average Red Occupancy')
+        # Find all timestamps where both Green_Occupancy and Red_Occupancy are above 0.79
+        timestamps = sf_filtered[sf_filtered['Split_Failure']].index
+        # Add a vertical line for each of those timestamps with a thinner line
+        for timestamp in timestamps:
+            plt.axvline(x=timestamp, color='yellow', linewidth=0.5)
+        plt.scatter(sf_filtered.index, sf_filtered['Green_Occupancy'], color='green', s=3, label='Green Occupancy')
+        plt.scatter(sf_filtered.index, sf_filtered['Red_Occupancy'], color='red', s=3, label='Red Occupancy')
+        plt.xlabel('Timestamp')
+        plt.ylabel('Occupancy')
+        # Use FuncFormatter to display y-axis values as percentages
+        plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
+        # Use DateFormatter and HourLocator for a cleaner x-axis
+        ax = plt.gca()
+        ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))
+        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
+        # Add gridlines
+        plt.grid(True)
+        # Remove whitespace by setting limits
+        plt.xlim(sf_filtered.index.min(), sf_filtered.index.max())
+        plt.ylim(0, 1)
+        # Rotate x-axis labels
+        plt.xticks(rotation=45)
+        # Create a custom legend entry
+        custom_line = Line2D([0], [0], color='yellow', lw=2, label='Split Failure (vertical line)')
+        # When calling legend(), append the custom entry to the list of handles
+        handles, labels = plt.gca().get_legend_handles_labels()
+        handles.append(custom_line)
+        plt.legend(handles=handles, loc='lower left', framealpha=1)
+        plt.title(f'Split Failures for DeviceId {DeviceId}, {name}')
+        plt.tight_layout()
+        plt.show()
+        class TimelineEvents:
+            pass

atspm/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .sample_data import sample_data
2	+ from .Aggregations import Aggregations

atspm/sample_data.py ADDED Viewed

@@ -0,0 +1,14 @@
+# sample_data.py inside the atspm package
+import pandas as pd
+import os
+# Assuming this file is in the same directory as the `data` directory
+data_dir = os.path.join(os.path.dirname(__file__), 'data')
+class SampleData:
+    def __init__(self):
+        self.config = pd.read_parquet(os.path.join(data_dir, 'sample_detector-config.parquet'))
+        self.data = pd.read_parquet(os.path.join(data_dir, 'sample_hi-res_data.parquet'))
+# Create an instance of the class
+sample_data = SampleData()

atspm-1.0.0.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Shawn Strasser
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

atspm-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,52 @@
+Metadata-Version: 2.1
+Name: atspm
+Version: 1.0.0
+Summary: Aggregates hi-res data from ATC traffic signal controllers into 15-minute binned ATSPM/performance measures.
+Author-email: Shawn Strasser <shawn.strasser@odot.oregon.gov>
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+License-File: LICENSE
+# ATSPM Aggregation
+This repository contains WORK-IN-PROGRESS code for efficient and scalable aggregation of Automated Traffic Signal Performance Measures (ATSPMs). It includes split failures, Yellow/Red actuations, Percent Arrival on Green. I'll add some more examples and documentation soon. This probably isn't ready for production use yet, use with caution, it will be more developed/tested by end of July 2023.
+## Quick Usage Overview for Split Failures
+```python
+# Import libraries
+import pandas as pd
+from Aggregations import Aggregations
+# Load hi-res and detector-phase configuration data
+raw_data = pd.read_parquet('sample_hi-res_data.parquet')
+config = pd.read_parquet('sample_detector-config.parquet')
+# Instantiate Aggregations class and load hi-res data and detector configurations into it
+aggr = Aggregations(data=raw_data, phase_detector_config=config)
+# Return aggregate split failures
+sf = aggr.split_failure()
+# Plot to inspect results (optional)
+aggr.plot_occupancy(sf, DeviceId=240, Phase=1)
+```
+![Alt text](example-SF-chart.png)
+## Try it on Colab!
+If you'd like to try out the code without installing anything, here is a Google Colab notebook:
+https://colab.research.google.com/drive/1xHs7H4SKRiIoX3GtUxDG0qiWS4zdmYsr
+## Background
+ATSPMs can be computationally expensive, making it difficult to scale simutaniously accross all traffic signals at an agency. Initially, for-loops were utilized to produce aggregations for a single detector at a time taking several seconds each, and this proved to be way too slow to scale to the entire signal system. That code is still available in the master branch. This branch focuses on vectorizing code to be able to run operations on every detector at every signal at the same time. The operations are done using SQL queries, which are executed using DuckDB, which is a library that is open source, fast, and utilizes all CPU cores.
+<br>
+Stay tuned for more!

atspm-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+atspm/Aggregations.py,sha256=fcEDnamzdmwYwLCTiRR4_7pt-fNDOiR9UAkvhTNajas,16337
+atspm/__init__.py,sha256=wEUHKOMbciMMHFOUn3y50AhcJjW9CqZwUx9R7LyMCEQ,78
+atspm/sample_data.py,sha256=wquzOo-OcAe72OBabVQuJWHKgzaxfLEfTayMDLznlLU,505
+atspm-1.0.0.dist-info/LICENSE,sha256=0K7oeO72fUbqlSWTLP4XvLsHo0gP6Z8CCpgUcREYOKg,1092
+atspm-1.0.0.dist-info/METADATA,sha256=NL_XSEjdqkykzHgX59KxY7T62mWp2CGp38yEdQGNsvo,2452
+atspm-1.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+atspm-1.0.0.dist-info/top_level.txt,sha256=KAAyAQpmMhRsMSAe_r7yRy8gt8Kvkf8vJG-CPnxghis,6
+atspm-1.0.0.dist-info/RECORD,,

atspm-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.43.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

atspm-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ atspm