PyPI - rowingdata - Versions diffs - 3.6.8__py3-none-any.whl - Mend

rowingdata 3.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

rowingdata/__init__.py +2 -0
rowingdata/__main__.py +2 -0
rowingdata/boatedit.py +15 -0
rowingdata/checkdatafiles.py +216 -0
rowingdata/copystats.py +22 -0
rowingdata/crewnerdplot.py +37 -0
rowingdata/crewnerdplottime.py +37 -0
rowingdata/csvparsers.py +3114 -0
rowingdata/ergdataplot.py +31 -0
rowingdata/ergdataplottime.py +31 -0
rowingdata/ergdatatotcx.py +32 -0
rowingdata/ergstickplot.py +31 -0
rowingdata/ergstickplottime.py +32 -0
rowingdata/ergsticktotcx.py +32 -0
rowingdata/example.csv +5171 -0
rowingdata/gpxtools.py +70 -0
rowingdata/gpxwrite.py +151 -0
rowingdata/konkatenaadje.py +19 -0
rowingdata/laptesting.py +293 -0
rowingdata/obsolete.py +654 -0
rowingdata/otherparsers.py +718 -0
rowingdata/painsled_desktop_plot.py +30 -0
rowingdata/painsled_desktop_plottime.py +29 -0
rowingdata/painsled_desktop_toc2.py +30 -0
rowingdata/painsledplot.py +27 -0
rowingdata/painsledplottime.py +27 -0
rowingdata/painsledtoc2.py +23 -0
rowingdata/roweredit.py +15 -0
rowingdata/rowingdata.py +6941 -0
rowingdata/rowproplot.py +31 -0
rowingdata/rowproplottime.py +31 -0
rowingdata/speedcoachplot.py +31 -0
rowingdata/speedcoachplottime.py +31 -0
rowingdata/speedcoachtoc2.py +36 -0
rowingdata/tcxplot.py +38 -0
rowingdata/tcxplot_nogeo.py +38 -0
rowingdata/tcxplottime.py +33 -0
rowingdata/tcxplottime_nogeo.py +33 -0
rowingdata/tcxtoc2.py +30 -0
rowingdata/tcxtools.py +417 -0
rowingdata/trainingparser.py +302 -0
rowingdata/utils.py +135 -0
rowingdata/windcorrected.py +48 -0
rowingdata/writetcx.py +312 -0
rowingdata-3.6.8.dist-info/LICENSE +21 -0
rowingdata-3.6.8.dist-info/METADATA +1149 -0
rowingdata-3.6.8.dist-info/RECORD +49 -0
rowingdata-3.6.8.dist-info/WHEEL +5 -0
rowingdata-3.6.8.dist-info/top_level.txt +1 -0

rowingdata/otherparsers.py ADDED Viewed

@@ -0,0 +1,718 @@
+# pylint: disable=C0103, C0303
+from __future__ import absolute_import
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from lxml import objectify
+from fitparse import FitFile
+try:
+    from . import tcxtools,gpxtools
+    from .utils import totimestamp, geo_distance
+except (ValueError,ImportError): # pragma: no cover
+    import tcxtools,gpxtools
+    from utils import totimestamp, geo_distance
+import sys
+if sys.version_info[0]<=2: # pragma: no cover
+    pythonversion = 2
+else:
+    pythonversion = 3
+import gzip
+import arrow
+import shutil
+from datetime import datetime
+from six.moves import range
+import json
+NAMESPACE = 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'
+def strip_non_ascii(string):
+    ''' Returns the string without non ASCII characters'''
+    stripped = (c for c in string if 0 < ord(c) < 127)
+    return ''.join(stripped)
+def tofloat(x):
+    try:
+        return float(x)
+    except ValueError: # pragma: no cover
+        return np.nan
+class ExcelTemplate(object):
+    def __init__(self,readfile): # pragma: no cover
+        self.readfile = readfile
+        xls_f = pd.ExcelFile(self.readfile)
+        self.xls_df = xls_f.parse('workout')
+        self.df = pd.DataFrame()
+        now = datetime.utcnow()
+        unixnow = arrow.get(now).timestamp
+        time = 0
+        totdistance = 0
+        for nr,row in self.xls_df.iterrows():
+            duration = row['Interval Time']
+            #duration = datetime.strptime(durationstring,"%M:%S.%f")
+            try:
+                seconds = 60*duration.minute+duration.second+duration.microsecond/1.e6
+            except AttributeError:
+                seconds = 0
+            if seconds:
+                spm = row['SPM']
+                if spm == np.nan:
+                    spm = 10.
+                deltat = 60./spm
+                aantal = max(2,int(seconds/deltat)+1)
+                time_list = time+np.arange(aantal)*deltat
+                distance = row['Interval Distance']
+                deltad = distance/float(aantal-1)
+                d_list = np.arange(aantal)*deltad
+                velo = distance/float(seconds)
+                pace = 500./velo
+                data = pd.DataFrame({
+                    'time':time_list,
+                    'distance':d_list,
+                    'hr':row['Avg HR'],
+                    'spm':spm,
+                    'pace':pace,
+                    'velo':velo,
+                    'type':4,
+                    ' lapIdx':nr
+                })
+                self.df = self.df.append(data)
+                time += seconds
+                totdistance = distance
+            if row['Rest Time'] != np.nan:
+                try:
+                    restseconds = 60.*row['Rest Time'].minute
+                    restseconds += row['Rest Time'].second
+                    restseconds += row['Rest Time'].microsecond/1.0e6
+                except AttributeError:
+                    restseconds = 0
+                if restseconds:
+                    restdistance = row['Rest Distance']
+                    deltat = 60./spm
+                    aantal = int(restseconds/deltat)
+                    time_list = time+np.arange(aantal)*deltat
+                    try:
+                        deltad = restdistance/float(aantal)
+                    except ZeroDivisionError:
+                        deltad = 0
+                    d_list = totdistance+np.arange(aantal)*deltad
+                    if restseconds:
+                        try:
+                            velo = restdistance/restseconds
+                            pace = 500./velo
+                        except ZeroDivisionError:
+                            velo = 0
+                            pace = 0
+                    else:
+                        velo = 0
+                        pace = 0
+                    data = pd.DataFrame({
+                        'time':time_list,
+                        'distance':d_list,
+                        'pace':pace,
+                        'velo':velo,
+                        ' lapIdx':nr,
+                        'type':3,
+                    })
+                    self.df = self.df.append(data)
+                    time += restseconds
+                    totdistance += restdistance
+        try:
+            self.df['TimeStamp (sec)'] = unixnow+self.df['time']
+        except TypeError:
+            self.df['TimeStamp (sec)'] = self.df['time']
+        self.df['power'] = 2.8*self.df['velo']**3
+        mapping = {
+            'time': ' ElapsedTime (sec)',
+            'distance': ' Horizontal (meters)',
+            'hr': ' HRCur (bpm)',
+            'spm': ' Cadence (stokes/min)',
+            'power': ' Power (watts)',
+            }
+        self.df.rename(columns = mapping, inplace=True)
+    def write_csv(self, *args, **kwargs): # pragma: no cover
+        isgzip = kwargs.pop('gzip', False)
+        writeFile = args[0]
+        data = self.df
+        if isgzip:
+            return data.to_csv(writeFile + '.gz', index_label='index',
+                               compression='gzip')
+        else:
+            return data.to_csv(writeFile, index_label='index')
+def fitsummarydata(*args, **kwargs): # pragma: no cover
+    from warnings import warn
+    warn("fitsummarydata was renamed to FitSummaryData")
+    return FitSummaryData(*args, **kwargs)
+class FitSummaryData(object):
+    def __init__(self, readfile):
+        self.readfile = readfile
+        self.fitfile = FitFile(readfile, check_crc=False)
+        self.records = self.fitfile.messages
+        recorddicts = []
+        lapdict = []
+        lapcounter = 0
+        for record in self.records:
+            if record.name ==  'record':
+                values = record.get_values()
+                values['lapid'] = lapcounter
+                recorddicts.append(values)
+            if record.name == 'lap':
+                lapcounter += 1
+                values = record.get_values()
+                values['lapid'] = lapcounter
+                lapdict.append(values)
+        self.df = pd.DataFrame(recorddicts)
+        self.lapdf = pd.DataFrame(lapdict)
+        self.summarytext = 'Work Details\n'
+    def setsummary(self, separator="|"):
+        lapcount = 0
+        self.summarytext += "#-{sep}SDist{sep}-Split-{sep}-SPace-{sep}-SPM-{sep}-Pwr-{sep}AvgHR{sep}MaxHR{sep}DPS-\n".format(
+            sep=separator
+            )
+        totaldistance = 0
+        totaltime = 0
+        dfgrouped = self.df.groupby('lapid')
+        for lapcount,group in dfgrouped:
+            intdist = int(
+                (self.lapdf[self.lapdf['lapid']==lapcount+1]['total_distance']).iloc[0]
+            )
+            if np.isnan(intdist): # pragma: no cover
+                intdist = 1
+            else:
+                intdist = int(intdist)
+            timestamps = group['timestamp'].apply(totimestamp)
+            inttime = self.lapdf[self.lapdf['lapid']==lapcount+1][
+                'total_elapsed_time'
+            ]
+            inttime = float(inttime.iloc[0])
+            try:
+                intpower = int(group['power'].mean())
+            except KeyError:
+                intpower = 0
+            lapmin = int(inttime/60)
+            lapsec = int(int(10*(inttime-lapmin*60.))/10.)
+            try:
+                intvelo = group['enhanced_speed'].mean()
+                intpace = 500./intvelo
+            except KeyError: # pragma: no cover
+                try:
+                    intvelo = group['speed'].mean()
+                    intpace = 500./intvelo
+                except KeyError:
+                    intvelo = 0
+                    intpace = 0
+            pacemin = int(intpace/60)
+            pacesec = int(10*(intpace-pacemin*60.))/10.
+            pacestring = str(pacemin)+":"+str(pacesec)
+            intspm = group['cadence'].mean()
+            inthr = int(group['heart_rate'].mean())
+            intmaxhr = int(group['heart_rate'].max())
+            strokecount = intspm*inttime/60.
+            try:
+                intdps = intdist/float(strokecount)
+            except ZeroDivisionError: # pragma: no cover
+                intdps = 0.0
+            summarystring = "{nr:0>2}{sep}{intdist:0>5d}{sep}".format(
+                nr=lapcount+1,
+                sep=separator,
+                intdist=intdist
+            )
+            summarystring += " {lapmin:0>2}:{lapsec:0>2} {sep}".format(
+                lapmin=lapmin,
+                lapsec=lapsec,
+                sep=separator,
+            )
+            summarystring += "{pacemin:0>2}:{pacesec:0>3.1f}".format(
+                pacemin=pacemin,
+                pacesec=pacesec,
+            )
+            summarystring += "{sep} {intspm:0>4.1f}{sep}".format(
+                intspm=intspm,
+                sep=separator
+            )
+            summarystring += " {intpower:0>3d} {sep}".format(
+                intpower=intpower,
+                sep=separator
+            )
+            summarystring += " {inthr:0>3d} {sep}".format(
+                inthr=inthr,
+                sep=separator
+            )
+            summarystring += " {intmaxhr:0>3d} {sep}".format(
+                intmaxhr=intmaxhr,
+                sep=separator
+            )
+            summarystring += " {dps:0>3.1f}".format(
+                dps=intdps
+            )
+            summarystring += "\n"
+            self.summarytext += summarystring
+        # add total summary
+        try:
+            overallvelo = self.df['enhanced_speed'].mean()
+        except KeyError: # pragma: no cover
+            overallvelo = self.df['speed'].mean()
+        timestamps = self.df['timestamp'].apply(totimestamp)
+        totaltime = timestamps.max()-timestamps.min()
+        overallpace = 500./overallvelo
+        minutes = int(overallpace/60)
+        sec = int(10*(overallpace-minutes*60.))/10.
+        pacestring = str(minutes)+":"+str(sec)
+        totmin = int(totaltime/60)
+        totsec = int(int(10*(totaltime-totmin*60.))/10.)
+        avghr = self.df['heart_rate'].mean()
+        grandmaxhr = self.df['heart_rate'].max()
+        try:
+            avgpower = self.df['power'].mean()
+        except KeyError:
+            avgpower = 0
+        try:
+            avgspm = self.df['cadence'].mean()
+        except KeyError: # pragma: no cover
+            avgspm = 0
+        totaldistance = self.df['distance'].max()-self.df['distance'].min()
+        if np.isnan(totaldistance): # pragma: no cover
+            totaldistance = 1
+        strokecount = avgspm*totaltime/60.
+        try:
+            avgdps = totaldistance/strokecount
+        except ZeroDivisionError: # pragma: no cover
+            avgdps = 0
+        summarystring = "Workout Summary\n"
+        summarystring += "--{sep}{totaldistance:0>5}{sep}".format(
+            totaldistance=int(totaldistance),
+            sep=separator
+            )
+        summarystring += " {totmin:0>2}:{totsec:0>2} {sep} ".format(
+            totmin=totmin,
+            totsec=totsec,
+            sep=separator,
+            )
+        summarystring += pacestring+separator
+        summarystring += " {avgspm:0>4.1f}{sep}".format(
+            sep=separator,
+            avgspm=avgspm
+            )
+        summarystring += " {avgpower:0>3} {sep}".format(
+            sep=separator,
+            avgpower=int(avgpower)
+            )
+        summarystring += " {avghr:0>3} {sep} {grandmaxhr:0>3} {sep}".format(
+            avghr=int(avghr),
+            grandmaxhr=int(grandmaxhr),
+            sep=separator
+            )
+        summarystring += " {avgdps:0>3.1f}".format(
+            avgdps=avgdps
+            )
+        self.summarytext += summarystring
+class FITParser(object):
+    def __init__(self, readfile):
+        extension = readfile[-3:].lower()
+        if extension == '.gz':
+            newfile = readfile[-3:]
+            with gzip.open(readfile,'rb') as f_in, open(newfile,'wb') as f_out:
+                shutil.copyfileobj(f_in, f_out)
+            self.readfile = newfile
+        else:
+            self.readfile = readfile
+        self.fitfile = FitFile(self.readfile, check_crc=False)
+        self.records = self.fitfile.messages
+        recorddicts = []
+        lapcounter = 0
+        for record in self.records:
+            if record.name == 'record':
+                values = record.get_values()
+                values['lapid'] = lapcounter
+                recorddicts.append(values)
+            if record.name == 'lap':
+                lapcounter += 1
+        self.df = pd.DataFrame(recorddicts)
+        # columns to lowercase - this should be easier
+        self.df.columns = [strip_non_ascii(x) for x in self.df.columns]
+        self.df.columns = [x.encode('ascii','ignore') for x in self.df.columns]
+        if pythonversion == 3:
+            #        self.df.columns = [str(x) for x in self.df.columns]
+            self.df.columns = [x.decode('ascii') for x in self.df.columns]
+        self.df.rename(columns = str.lower,inplace=True)
+        # check column dimensions
+        for c in self.df.columns:
+            x = self.df[c]
+            if len(x.shape)>1: # pragma: no cover
+                newdf = pd.DataFrame({
+                    c: x.iloc[:,0].values
+                    })
+                self.df.drop(labels=c,axis=1,inplace=True)
+                self.df[c] = newdf[c]
+        try:
+            latitude = self.df['position_lat']*(180./2**31)
+            longitude = self.df['position_long']*(180./2**31)
+        except KeyError: # pragma: no cover
+            # no coordinates
+            latitude = 0
+            longitude = 0
+        try:
+            distance = self.df['distance']
+        except KeyError: # pragma: no cover
+            distance = pd.Series(np.zeros(len(self.df)))
+        self.df['position_lat'] = latitude
+        self.df['position_long'] = longitude
+        if pd.isnull(distance).all():
+            dist2 = np.zeros(len(distance))
+            for i in range(len(distance)-1):
+                res = geo_distance(
+                    latitude[i],
+                    longitude[i],
+                    latitude[i+1],
+                    longitude[i+1]
+                )
+                deltal = 1000.*res[0]
+                dist2[i+1] = dist2[i]+deltal
+            self.df['distance'] = dist2
+        try:
+            velo = self.df['enhanced_speed']
+        except KeyError: # pragma: no cover
+            try:
+                velo = self.df['speed']
+            except KeyError:
+                velo = pd.Series(np.zeros(len(self.df)))
+        try:
+            if velo.mean() >= 1000: # pragma: no cover
+                velo = velo/1000.
+        except TypeError: # pragma: no cover
+            pass
+        try:
+            timestamps = self.df['timestamp'].apply(totimestamp)
+        except AttributeError: # pragma: no cover
+            pass
+        try:
+            pace = 500./velo
+        except TypeError: # pragma: no cover
+            pace = pd.Series(np.zeros(len(self.df)))
+        elapsed_time = timestamps-timestamps.values[0]
+        self.df['TimeStamp (sec)'] = timestamps
+        self.df[' Stroke500mPace (sec/500m)'] = pace
+        self.df[' ElapsedTime (sec)'] = elapsed_time
+        hrname = 'heart_rate'
+        spmname = 'cadence'
+        if 'heart rate' in self.df.columns: # pragma: no cover
+            hrname = 'heart rate'
+        if 'stroke rate' in self.df.columns: # pragma: no cover
+            spmname = 'stroke rate'
+        newcolnames = {
+            'power': ' Power (watts)',
+            hrname: ' HRCur (bpm)',
+            'position_long': ' longitude',
+            'position_lat': ' latitude',
+            spmname: ' Cadence (stokes/min)',
+            'lapid': ' lapIdx',
+            'distance': ' Horizontal (meters)'
+            }
+        self.df.rename(columns=newcolnames,inplace=True)
+        # timestamp
+        # distance
+        # pace
+        # elapsedtime
+    def write_csv(self, writefile="fit_o.csv", gzip=False):
+        if gzip: # pragma: no cover
+            return self.df.to_csv(writefile+'.gz', index_label='index',
+                                  compression='gzip')
+        else:
+            return self.df.to_csv(writefile, index_label='index')
+class JSONParser(object): # pragma: no cover
+    def __init__(self, json_file):
+        df = pd.DataFrame()
+        with open(json_file,'r') as f:
+            data = json.load(f)
+        laps = data['laps']
+        for lap in laps:
+            points = lap['points']
+            ldf = pd.DataFrame.from_records(points)
+            df=df.append(ldf,ignore_index=True)
+        self.df = df
+        newcolnames = {
+            'time':'TimeStamp (sec)',
+            'hr':' HRCur (bpm)',
+            }
+        self.df.rename(columns=newcolnames,inplace=True)
+    def write_csv(self,writefile="json_o.csv", gzip = False):
+        if gzip:
+            return self.df.to_csv(writefile+'.gz', index_label='index',compression='gzip')
+        else:
+            return self.df.to_csv(writefile, index_label='index')
+class TCXParserTester(object): # pragma: no cover
+    def __init__(self, tcx_file):
+        tree = objectify.parse(tcx_file)
+        self.root = tree.getroot()
+        self.activity = self.root.Activities.Activity
+        # need to select only trackpoints with Cadence, Distance,
+        # Time & HR data
+        self.selectionstring = '//ns:Trackpoint[descendant::ns:HeartRateBpm]'
+        self.selectionstring += '[descendant::ns:Cadence]'
+        self.selectionstring += '[descendant::ns:DistanceMeters]'
+        self.selectionstring += '[descendant::ns:Time]'
+        self.hr_values = self.root.xpath(self.selectionstring
+                                         +'//ns:HeartRateBpm/ns:Value',
+                                         namespaces={'ns': NAMESPACE})
+        self.distance_values = self.root.xpath(self.selectionstring
+                                               +'/ns:DistanceMeters',
+                                               namespaces={'ns': NAMESPACE})
+        self.spm_values = self.root.xpath(self.selectionstring
+                                          +'/ns:Cadence',
+                                          namespaces={'ns': NAMESPACE})
+    def getarray(self, str1, str2=''):
+        selectionstring = self.selectionstring
+        selectionstring = selectionstring+'//ns:'+str1
+        if str2 != '':
+            selectionstring = selectionstring+'/ns:'+str2
+        the_array = self.root.xpath(selectionstring,
+                                    namespaces={'ns': NAMESPACE})
+        return the_array
+class GPXParser(object): # pragma: no cover
+    def __init__(self, gpx_file, *args, **kwargs):
+        self.df = gpxtools.gpxtodf2(gpx_file)
+    def write_csv(self, writefile='example.csv', window_size=5, gzip=False):
+        data = self.df
+        data = data.sort_values(by='TimeStamp (sec)', ascending=True)
+        data = data.ffill()
+        # drop all-zero columns
+        for c in data.columns:
+            if (data[c] == 0).any() and data[c].mean() == 0:
+                data = data.drop(c, axis=1)
+            if c == 'Position':
+                data = data.drop(c, axis=1)
+            if c == 'Extensions':
+                data = data.drop(c, axis=1)
+        if gzip:
+            return data.to_csv(writefile+'.gz', index_label='index',
+                               compression='gzip')
+        else:
+            return data.to_csv(writefile, index_label='index')
+class TCXParser(object):
+    def __init__(self, tcx_file, *args, **kwargs):
+        if 'alternative' in kwargs: # pragma: no cover
+            alternative = kwargs['alternative']
+        else:
+            alternative = False
+        if alternative: # pragma: no cover
+            self.df = tcxtools.tcxtodf(tcx_file)
+        else:
+            self.df = tcxtools.tcxtodf3(tcx_file)
+        try:
+            lat = self.df['latitude'].apply(tofloat).values
+            longitude = self.df['longitude'].apply(tofloat).values
+        except KeyError: # pragma: no cover
+            self.df['latitude'] = 0
+            self.df['longitude'] = 0
+            lat = self.df['latitude'].apply(tofloat).values
+            longitude = self.df['longitude'].apply(tofloat).values
+        unixtimes = self.df['timestamp'].values
+        try:
+            spm = self.df['Cadence'].apply(tofloat).values
+        except KeyError: # pragma: no cover
+            try:
+                spm = self.df['StrokeRate'].apply(tofloat).values
+                self.df['Cadence'] = self.df['StrokeRate']
+            except KeyError:
+                try:
+                    spm = 0.0*self.df['Speed'].apply(tofloat).values
+                except KeyError:
+                    spm = 0.0*unixtimes
+        try:
+            velo = self.df['Speed'].apply(tofloat)
+            dist2 = self.df['DistanceMeters'].apply(tofloat)
+            strokelength = velo*60./spm
+        except KeyError: # pragma: no cover
+            nr_rows = len(lat)
+            dist2 = np.zeros(nr_rows)
+            velo = np.zeros(nr_rows)
+            strokelength = np.zeros(nr_rows)
+            for i in range(nr_rows-1):
+                res = geo_distance(lat[i], longitude[i], lat[i+1], longitude[i+1])
+                deltal = 1000.*res[0]
+                dist2[i+1] = dist2[i]+deltal
+                try:
+                    velo[i+1] = deltal/(1.0*(unixtimes[i+1]-unixtimes[i]))
+                except ZeroDivisionError:
+                    velo[i+1] = velo[i]
+                if spm[i] != 0:
+                    strokelength[i] = deltal*60/spm[i]
+                else: # pragma: no cover
+                    strokelength[i] = 0.
+        try:
+            power = self.df['Watts']
+        except KeyError: # pragma: no cover
+            try:
+                power = self.df['ns3:Watts']
+            except KeyError:
+                power = 0*spm
+            self.df['Watts'] = power
+        p = 500./velo
+        self.df[' Horizontal (meters)'] = dist2
+        self.df[' StrokeDistance (meters)'] = strokelength
+        self.df[' Stroke500mPace (sec/500m)'] = p
+        # translate from standard TCX names to our naming convention
+        self.columns = {
+            'timestamp':'TimeStamp (sec)',
+            'Cadence': ' Cadence (stokes/min)',
+            'HeartRateBpm' : ' HRCur (bpm)',
+            'Watts': ' Power (watts)',
+            'lapid': ' lapIdx',
+            'latitude': ' latitude',
+            'longitude': ' longitude',
+        }
+        self.df.rename(columns=self.columns, inplace=True)
+        cc = [value for key, value in self.columns.items()]
+        for c in cc:
+            if c != 'lapIdx':
+                try:
+                    self.df[c] = self.df[c].astype(float)
+                except KeyError: # pragma: no cover
+                    pass
+    def write_csv(self, writefile='example.csv', window_size=5, gzip=False):
+        data = self.df
+        data = data.sort_values(by='TimeStamp (sec)', ascending=True)
+        data = data.ffill()
+        # drop all-zero columns
+        for c in data.columns:
+            if (data[c] == 0).any() and data[c].mean() == 0:
+                data = data.drop(c, axis=1)
+            if c == 'Position': # pragma: no cover
+                data = data.drop(c, axis=1)
+            if c == 'Extensions': # pragma: no cover
+                data = data.drop(c, axis=1)
+        if gzip: # pragma: no cover
+            return data.to_csv(writefile+'.gz', index_label='index',
+                               compression='gzip')
+        else:
+            return data.to_csv(writefile, index_label='index')