mobilizr-python 0.94__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mobilizr_python-0.94/MANIFEST.in +1 -0
- mobilizr_python-0.94/PKG-INFO +16 -0
- mobilizr_python-0.94/README.md +30 -0
- mobilizr_python-0.94/mobilizr-python/__init__.py +45 -0
- mobilizr_python-0.94/mobilizr-python/datasets/__init__.py +7 -0
- mobilizr_python-0.94/mobilizr-python/datasets/atu_clean.py +41 -0
- mobilizr_python-0.94/mobilizr-python/datasets/atu_dirty.py +43 -0
- mobilizr_python-0.94/mobilizr-python/datasets/cdc.py +61 -0
- mobilizr_python-0.94/mobilizr-python/datasets/food_ids.py +8 -0
- mobilizr_python-0.94/mobilizr-python/datasets/timeuse_ids.py +8 -0
- mobilizr_python-0.94/mobilizr-python/datasets/timeuse_ids_clean.py +8 -0
- mobilizr_python-0.94/mobilizr_python.egg-info/PKG-INFO +16 -0
- mobilizr_python-0.94/mobilizr_python.egg-info/SOURCES.txt +16 -0
- mobilizr_python-0.94/mobilizr_python.egg-info/dependency_links.txt +1 -0
- mobilizr_python-0.94/mobilizr_python.egg-info/requires.txt +1 -0
- mobilizr_python-0.94/mobilizr_python.egg-info/top_level.txt +1 -0
- mobilizr_python-0.94/setup.cfg +4 -0
- mobilizr_python-0.94/setup.py +19 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include my_datasets_lib/data/*.csv
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mobilizr-python
|
|
3
|
+
Version: 0.94
|
|
4
|
+
Summary: A library of custom datasets.
|
|
5
|
+
Home-page: https://github.com/emilio-dulay/my_datasets_lib
|
|
6
|
+
Author: Emilio Dulay
|
|
7
|
+
Author-email: emiliodulay19@g.ucla.edu
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Requires-Dist: pandas
|
|
11
|
+
Dynamic: author
|
|
12
|
+
Dynamic: author-email
|
|
13
|
+
Dynamic: classifier
|
|
14
|
+
Dynamic: home-page
|
|
15
|
+
Dynamic: requires-dist
|
|
16
|
+
Dynamic: summary
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# UCLA IDS Data Library
|
|
2
|
+
|
|
3
|
+
A lightweight Python library for loading example datasets used in data science courses.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
Clone the repository and install locally:
|
|
10
|
+
git clone https://github.com/EmilioD19/UCLA-IDS-Data-Library.git
|
|
11
|
+
cd UCLA-IDS-Data-Library
|
|
12
|
+
pip install .
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
Import functions:
|
|
16
|
+
from my_datasets_lib import load, describe, list_datasets
|
|
17
|
+
|
|
18
|
+
## Load a dataset
|
|
19
|
+
df = load("iris")
|
|
20
|
+
print(df.head())
|
|
21
|
+
|
|
22
|
+
## Describe a dataset
|
|
23
|
+
print(describe("iris"))
|
|
24
|
+
|
|
25
|
+
## List available datasets
|
|
26
|
+
print(list_datasets())
|
|
27
|
+
|
|
28
|
+
## Error Handling
|
|
29
|
+
If a dataset does not exist, ValueError is raised:
|
|
30
|
+
load("nonexistent")
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from . import datasets
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def data(name):
|
|
5
|
+
"""Load a dataset by name."""
|
|
6
|
+
if hasattr(datasets, name):
|
|
7
|
+
return getattr(datasets, name).load()
|
|
8
|
+
else:
|
|
9
|
+
raise ValueError(f"Dataset '{name}' not found in my_datasets_lib.datasets")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def describe(name):
|
|
13
|
+
"""Get the description for a dataset by name."""
|
|
14
|
+
if hasattr(datasets, name):
|
|
15
|
+
return getattr(datasets, name).describe()
|
|
16
|
+
else:
|
|
17
|
+
raise ValueError(f"Dataset '{name}' not found in my_datasets_lib.datasets")
|
|
18
|
+
|
|
19
|
+
def list_datasets():
|
|
20
|
+
"""Return a list of available dataset names."""
|
|
21
|
+
return [name for name in dir(datasets) if not name.startswith("_")]
|
|
22
|
+
|
|
23
|
+
def View(name):
|
|
24
|
+
"""View a dataset by name."""
|
|
25
|
+
if hasattr(datasets, name):
|
|
26
|
+
df = getattr(datasets, name).load()
|
|
27
|
+
if isinstance(df, pd.DataFrame):
|
|
28
|
+
return df
|
|
29
|
+
else:
|
|
30
|
+
raise TypeError(f"Dataset '{name}' is not a pandas DataFrame.")
|
|
31
|
+
else:
|
|
32
|
+
raise ValueError(f"Dataset '{name}' not found in my_datasets_lib.datasets")
|
|
33
|
+
|
|
34
|
+
def timeuse_format(df):
|
|
35
|
+
df = df.replace("NOT_DISPLAYED", 0)
|
|
36
|
+
df[df.columns.difference(['user.id', 'timestamp', 'activities'])] = \
|
|
37
|
+
df[df.columns.difference(['user.id', 'timestamp', 'activities'])].apply(pd.to_numeric)
|
|
38
|
+
|
|
39
|
+
submissions = df.groupby('user.id').size().rename("submissions")
|
|
40
|
+
|
|
41
|
+
averages = df.groupby('user.id').mean(numeric_only = True)
|
|
42
|
+
|
|
43
|
+
return averages.merge(submissions, on = "user.id")
|
|
44
|
+
|
|
45
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def load():
|
|
5
|
+
"""Load the atu_clean dataset."""
|
|
6
|
+
path = os.path.join(os.path.dirname(__file__), '..', 'data', 'atu_clean.csv')
|
|
7
|
+
df = pd.read_csv(path)
|
|
8
|
+
return df
|
|
9
|
+
|
|
10
|
+
def describe():
|
|
11
|
+
"""Return a description of the atu_clean dataset."""
|
|
12
|
+
return '''
|
|
13
|
+
American Time Use Survey Data Sample - Clean
|
|
14
|
+
Description
|
|
15
|
+
A dataset containing a subset of variables from the American Time Use Survey. This dataset is a cleaned version of atu_dirty.
|
|
16
|
+
|
|
17
|
+
Usage
|
|
18
|
+
data(atu_clean)
|
|
19
|
+
Format
|
|
20
|
+
A data frame with 10,493 observations of 8 variables
|
|
21
|
+
|
|
22
|
+
Details
|
|
23
|
+
caseid. unique identifier of individual survey participant
|
|
24
|
+
|
|
25
|
+
age. the age of the respondent
|
|
26
|
+
|
|
27
|
+
sex. the sex of the respondent
|
|
28
|
+
|
|
29
|
+
fulltime_emp. the employment status of the respondent
|
|
30
|
+
|
|
31
|
+
phys_challenge. does the respondent have a physical difficulty
|
|
32
|
+
|
|
33
|
+
sleep. the length of time the person sleeps, in minutes
|
|
34
|
+
|
|
35
|
+
homework. How long the respondent spent on homework assignments, in minutes
|
|
36
|
+
|
|
37
|
+
socializing. the number of minutes the respondent spent socializing
|
|
38
|
+
|
|
39
|
+
Source
|
|
40
|
+
http://www.bls.gov/tus/
|
|
41
|
+
'''
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def load():
|
|
5
|
+
"""Load the atu_dirty dataset."""
|
|
6
|
+
path = os.path.join(os.path.dirname(__file__), '..', 'data', 'atu_dirty.csv')
|
|
7
|
+
df = pd.read_csv(path)
|
|
8
|
+
return df
|
|
9
|
+
|
|
10
|
+
def describe():
|
|
11
|
+
"""Return a description of the atu_dirty dataset."""
|
|
12
|
+
return
|
|
13
|
+
'''
|
|
14
|
+
American Time Use Survey Data Sample - Dirty
|
|
15
|
+
Description
|
|
16
|
+
A dataset containing a subset of variables from the American Time Use Survey. This dataset is "dirty", meaning it has elements which require formatting before use.
|
|
17
|
+
|
|
18
|
+
Usage
|
|
19
|
+
data(atu_dirty)
|
|
20
|
+
Format
|
|
21
|
+
A data frame with 10,493 observations of 8 variables
|
|
22
|
+
|
|
23
|
+
Details
|
|
24
|
+
caseid. unique identifier of individual survey participant
|
|
25
|
+
|
|
26
|
+
V1. the age of the respondent
|
|
27
|
+
|
|
28
|
+
V2. the gender of the respondent (1: Male, 2: Female)
|
|
29
|
+
|
|
30
|
+
V3. the employment status of the respondent
|
|
31
|
+
|
|
32
|
+
V4. does the respondent have a physical difficulty (1: Person did not report having a physical difficulty, 2: Person surveyed reported the have a physical difficulty)
|
|
33
|
+
|
|
34
|
+
V5. the length of time the person sleeps, in minutes
|
|
35
|
+
|
|
36
|
+
V6. How long the respondent spent on homework assignments, in minutes
|
|
37
|
+
|
|
38
|
+
V7. the number of minutes the respondent spent socializing
|
|
39
|
+
|
|
40
|
+
Source
|
|
41
|
+
http://www.bls.gov/tus/
|
|
42
|
+
'''.strip()
|
|
43
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def load():
|
|
5
|
+
"""Load the cdc dataset."""
|
|
6
|
+
path = os.path.join(os.path.dirname(__file__), '..', 'data', 'cdc.csv')
|
|
7
|
+
df = pd.read_csv(path)
|
|
8
|
+
return df
|
|
9
|
+
|
|
10
|
+
def describe():
|
|
11
|
+
"""Return a description of the CDC dataset."""
|
|
12
|
+
return """
|
|
13
|
+
CDC Youth Risk Behavior Survey Data
|
|
14
|
+
|
|
15
|
+
**Description**
|
|
16
|
+
A dataset containing responses from the 2021 CDC Youth Risk Behavior Survey.
|
|
17
|
+
|
|
18
|
+
**Usage**
|
|
19
|
+
`data(cdc)`
|
|
20
|
+
|
|
21
|
+
**Format**
|
|
22
|
+
A data frame with 17,232 observations of 32 variables.
|
|
23
|
+
|
|
24
|
+
**Details**
|
|
25
|
+
- **age** — age in years
|
|
26
|
+
- **sex** — sex assigned at birth
|
|
27
|
+
- **grade** — grade in school
|
|
28
|
+
- **height** — height of student in meters
|
|
29
|
+
- **weight** — weight of student in kilograms
|
|
30
|
+
- **seat_belt** — how often student wore a seatbelt in a motor vehicle driven by someone else
|
|
31
|
+
- **drive_text** — how often the student reported texting while driving in the past 30 days
|
|
32
|
+
- **hisp_latino** — whether or not student identifies as Hispanic or Latino
|
|
33
|
+
- **american_indian_or_alaska_native** — whether or not student identifies as American Indian or Alaska Native
|
|
34
|
+
- **asian** — whether or not student identifies as Asian
|
|
35
|
+
- **black_or_african_american** — whether or not student identifies as Black or African American
|
|
36
|
+
- **native_hawaiian_or_other_pacific_islander** — whether or not student identifies as Native Hawaiian or other Pacific Islander
|
|
37
|
+
- **white** — whether or not student identifies as White
|
|
38
|
+
- **bully_school** — did the student report being bullied at school
|
|
39
|
+
- **bully_electronic** — did the student report being bullied online
|
|
40
|
+
- **depressed** — student reported feeling depressed for 2 weeks in a row or more during the past 12 months
|
|
41
|
+
- **days_smoking** — number of days student reported smoking cigarettes during past 30 days
|
|
42
|
+
- **days_vaping** — number of days student reported vaping/smoking electronic cigarettes during past 30 days
|
|
43
|
+
- **sexuality** — how the student describes their sexual orientation
|
|
44
|
+
- **describe_weight** — student perception of their weight relative to what they believe it should be
|
|
45
|
+
- **drink_juice** — how often student consumed fruit juice over the previous 7 days
|
|
46
|
+
- **eat_fruit** — how often student ate fruit over the previous 7 days
|
|
47
|
+
- **eat_salad** — how often student ate salad over the previous 7 days
|
|
48
|
+
- **drink_soda** — how often student consumed soda over the previous 7 days
|
|
49
|
+
- **drink_milk** — how often student drank milk over the previous 7 days
|
|
50
|
+
- **eat_breakfast** — how often the student reported eating breakfast over the past 7 days
|
|
51
|
+
- **days_exercise_60** — how often student was active for at least 60 mins over the previous 7 days
|
|
52
|
+
- **screen_time** — average number of hours spent on a screen on a school day
|
|
53
|
+
- **number_teams** — number of sports teams played on during previous 12 months
|
|
54
|
+
- **hours_sleep** — reported hours of sleep on school nights
|
|
55
|
+
- **drink_sportdrink** — how often student consumed sports drinks over the past 7 days
|
|
56
|
+
- **drink_water** — how often student consumed water over the past 7 days
|
|
57
|
+
|
|
58
|
+
**Source**
|
|
59
|
+
http://www.cdc.gov/HealthyYouth/yrbs/index.htm
|
|
60
|
+
""".strip()
|
|
61
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mobilizr-python
|
|
3
|
+
Version: 0.94
|
|
4
|
+
Summary: A library of custom datasets.
|
|
5
|
+
Home-page: https://github.com/emilio-dulay/my_datasets_lib
|
|
6
|
+
Author: Emilio Dulay
|
|
7
|
+
Author-email: emiliodulay19@g.ucla.edu
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Requires-Dist: pandas
|
|
11
|
+
Dynamic: author
|
|
12
|
+
Dynamic: author-email
|
|
13
|
+
Dynamic: classifier
|
|
14
|
+
Dynamic: home-page
|
|
15
|
+
Dynamic: requires-dist
|
|
16
|
+
Dynamic: summary
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
mobilizr-python/__init__.py
|
|
5
|
+
mobilizr-python/datasets/__init__.py
|
|
6
|
+
mobilizr-python/datasets/atu_clean.py
|
|
7
|
+
mobilizr-python/datasets/atu_dirty.py
|
|
8
|
+
mobilizr-python/datasets/cdc.py
|
|
9
|
+
mobilizr-python/datasets/food_ids.py
|
|
10
|
+
mobilizr-python/datasets/timeuse_ids.py
|
|
11
|
+
mobilizr-python/datasets/timeuse_ids_clean.py
|
|
12
|
+
mobilizr_python.egg-info/PKG-INFO
|
|
13
|
+
mobilizr_python.egg-info/SOURCES.txt
|
|
14
|
+
mobilizr_python.egg-info/dependency_links.txt
|
|
15
|
+
mobilizr_python.egg-info/requires.txt
|
|
16
|
+
mobilizr_python.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pandas
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mobilizr-python
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name='mobilizr-python',
|
|
5
|
+
version='0.94',
|
|
6
|
+
packages=find_packages(),
|
|
7
|
+
include_package_data=True,
|
|
8
|
+
install_requires=[
|
|
9
|
+
'pandas',
|
|
10
|
+
],
|
|
11
|
+
description='A library of custom datasets.',
|
|
12
|
+
author='Emilio Dulay',
|
|
13
|
+
author_email='emiliodulay19@g.ucla.edu',
|
|
14
|
+
url="https://github.com/emilio-dulay/my_datasets_lib",
|
|
15
|
+
classifiers=[
|
|
16
|
+
'Programming Language :: Python :: 3',
|
|
17
|
+
'License :: OSI Approved :: MIT License',
|
|
18
|
+
],
|
|
19
|
+
)
|