mobts 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. mobts-0.1.0/LICENSE +21 -0
  2. mobts-0.1.0/PKG-INFO +80 -0
  3. mobts-0.1.0/README.md +48 -0
  4. mobts-0.1.0/pyproject.toml +77 -0
  5. mobts-0.1.0/setup.cfg +4 -0
  6. mobts-0.1.0/src/mobts/__init__.py +4 -0
  7. mobts-0.1.0/src/mobts/configs/__init__.py +0 -0
  8. mobts-0.1.0/src/mobts/configs/config_common.py +29 -0
  9. mobts-0.1.0/src/mobts/configs/config_imputation.py +64 -0
  10. mobts-0.1.0/src/mobts/configs/config_preprocessing.py +70 -0
  11. mobts-0.1.0/src/mobts/divider/__init__.py +29 -0
  12. mobts-0.1.0/src/mobts/divider/divider.py +55 -0
  13. mobts-0.1.0/src/mobts/divider/divider_error.py +61 -0
  14. mobts-0.1.0/src/mobts/imputation/__init__.py +2 -0
  15. mobts-0.1.0/src/mobts/imputation/donors.py +317 -0
  16. mobts-0.1.0/src/mobts/imputation/pipeline.py +254 -0
  17. mobts-0.1.0/src/mobts/imputation/selector.py +296 -0
  18. mobts-0.1.0/src/mobts/imputation/stl.py +279 -0
  19. mobts-0.1.0/src/mobts/main.py +32 -0
  20. mobts-0.1.0/src/mobts/preprocessing/__init__.py +1 -0
  21. mobts-0.1.0/src/mobts/preprocessing/cleaning.py +296 -0
  22. mobts-0.1.0/src/mobts/preprocessing/outliers.py +196 -0
  23. mobts-0.1.0/src/mobts/preprocessing/pipeline.py +361 -0
  24. mobts-0.1.0/src/mobts/preprocessing/plotting.py +204 -0
  25. mobts-0.1.0/src/mobts/utils/__init__.py +0 -0
  26. mobts-0.1.0/src/mobts/utils/formatting.py +208 -0
  27. mobts-0.1.0/src/mobts.egg-info/PKG-INFO +80 -0
  28. mobts-0.1.0/src/mobts.egg-info/SOURCES.txt +29 -0
  29. mobts-0.1.0/src/mobts.egg-info/dependency_links.txt +1 -0
  30. mobts-0.1.0/src/mobts.egg-info/requires.txt +20 -0
  31. mobts-0.1.0/src/mobts.egg-info/top_level.txt +1 -0
mobts-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 IFPEN
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
mobts-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: mobts
3
+ Version: 0.1.0
4
+ Summary: Mobility time series package for preprocessing, imputing, and analyzing mobility count observation data
5
+ Author: Ali SHATERI BENAM
6
+ Project-URL: Documentation, https://mobidec.github.io/mobts/index.html
7
+ Project-URL: Repository, https://github.com/Mobidec/mobts.git
8
+ Project-URL: Issues, https://github.com/Mobidec/mobts/issues
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: pandas
13
+ Requires-Dist: numpy
14
+ Requires-Dist: statsmodels
15
+ Requires-Dist: scikit-learn
16
+ Requires-Dist: matplotlib
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest; extra == "dev"
19
+ Requires-Dist: pytest-cov; extra == "dev"
20
+ Requires-Dist: pre-commit; extra == "dev"
21
+ Requires-Dist: build; extra == "dev"
22
+ Requires-Dist: twine; extra == "dev"
23
+ Requires-Dist: ruff; extra == "dev"
24
+ Requires-Dist: sphinx; extra == "dev"
25
+ Requires-Dist: sphinx_design; extra == "dev"
26
+ Requires-Dist: furo; extra == "dev"
27
+ Requires-Dist: sphinx_autopackagesummary; extra == "dev"
28
+ Requires-Dist: sphinxcontrib-napoleon; extra == "dev"
29
+ Requires-Dist: numpydoc; extra == "dev"
30
+ Requires-Dist: myst_nb; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # mobts
34
+
35
+ > Python package for preprocessing and imputing urban mobility time series data.
36
+
37
+ Designed for transport datasets such as bike counts, traffic loops, and station-based observations.
38
+
39
+ ---
40
+
41
+ ## What this package does
42
+
43
+ - Clean time series data:
44
+ + Detects measurement errors
45
+ + Flags and removes them
46
+
47
+ - Imputes missing/invalid data based on a multi-tier method
48
+
49
+ ---
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install mobts
55
+
56
+ ---
57
+
58
+ ## Example of running the code
59
+
60
+ from mobts import preprocess
61
+ from mobts import impute
62
+
63
+ # Step 1: clean raw data
64
+ pp = preprocess()
65
+ df_clean = pp.run(df)
66
+
67
+ # Step 2: impute missing values
68
+ imp = impute()
69
+ df_imputed = imp.run(df_clean)
70
+
71
+ ---
72
+
73
+ ### Functional examples
74
+
75
+ Full step-by-step examples are available in:
76
+ - notebooks/demo_preprocessing_imputation.ipynb
77
+
78
+ ## License
79
+
80
+ This project is licensed under the MIT License, which means it is freely usable for personal and commercial purposes. The MIT License is one of the most permissive open source licenses. It allows you to do almost anything with the source code, as long as you retain the original license notice and copyright information when redistributing the software or substantial portions of it. This license comes without any warranties, so the software is provided "as is." For more details, please refer to the included LICENSE file.
mobts-0.1.0/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # mobts
2
+
3
+ > Python package for preprocessing and imputing urban mobility time series data.
4
+
5
+ Designed for transport datasets such as bike counts, traffic loops, and station-based observations.
6
+
7
+ ---
8
+
9
+ ## What this package does
10
+
11
+ - Clean time series data:
12
+ + Detects measurement errors
13
+ + Flags and removes them
14
+
15
+ - Imputes missing/invalid data based on a multi-tier method
16
+
17
+ ---
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install mobts
23
+
24
+ ---
25
+
26
+ ## Example of running the code
27
+
28
+ from mobts import preprocess
29
+ from mobts import impute
30
+
31
+ # Step 1: clean raw data
32
+ pp = preprocess()
33
+ df_clean = pp.run(df)
34
+
35
+ # Step 2: impute missing values
36
+ imp = impute()
37
+ df_imputed = imp.run(df_clean)
38
+
39
+ ---
40
+
41
+ ### Functional examples
42
+
43
+ Full step-by-step examples are available in:
44
+ - notebooks/demo_preprocessing_imputation.ipynb
45
+
46
+ ## License
47
+
48
+ This project is licensed under the MIT License, which means it is freely usable for personal and commercial purposes. The MIT License is one of the most permissive open source licenses. It allows you to do almost anything with the source code, as long as you retain the original license notice and copyright information when redistributing the software or substantial portions of it. This license comes without any warranties, so the software is provided "as is." For more details, please refer to the included LICENSE file.
@@ -0,0 +1,77 @@
1
+ [project]
2
+ name = "mobts"
3
+ version = "0.1.0"
4
+ description = "Mobility time series package for preprocessing, imputing, and analyzing mobility count observation data"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ authors = [
8
+ { name = "Ali SHATERI BENAM" }
9
+ ]
10
+ dependencies = [
11
+ "pandas",
12
+ "numpy",
13
+ "statsmodels",
14
+ "scikit-learn",
15
+ "matplotlib"
16
+ ]
17
+
18
+ [build-system]
19
+ requires = ["setuptools>=61"]
20
+ build-backend = "setuptools.build_meta"
21
+
22
+ [project.urls]
23
+ Documentation = "https://mobidec.github.io/mobts/index.html"
24
+ Repository = "https://github.com/Mobidec/mobts.git"
25
+ Issues = "https://github.com/Mobidec/mobts/issues"
26
+
27
+ [project.optional-dependencies]
28
+ dev = [
29
+ "pytest",
30
+ "pytest-cov",
31
+ "pre-commit",
32
+ "build",
33
+ "twine",
34
+ "ruff",
35
+ "sphinx",
36
+ "sphinx_design",
37
+ "furo",
38
+ "sphinx_autopackagesummary",
39
+ "sphinxcontrib-napoleon",
40
+ "numpydoc",
41
+ "myst_nb",
42
+ ]
43
+
44
+ [tool.setuptools.packages.find]
45
+ where = ["src"]
46
+
47
+ [tool.setuptools.package-data]
48
+ "mobts" = []
49
+
50
+ [tool.setuptools]
51
+ py-modules = []
52
+
53
+ [tool.ruff]
54
+ line-length = 140
55
+ exclude = []
56
+
57
+ [tool.ruff.lint]
58
+ extend-select = [
59
+ "UP",
60
+ "E501",
61
+ "I",
62
+ "B",
63
+ "F",
64
+ "E",
65
+ "N",
66
+ "A",
67
+ "PL",
68
+ "D"
69
+ ]
70
+
71
+ [tool.ruff.lint.pydocstyle]
72
+ convention = "numpy"
73
+
74
+ [tool.ruff.format]
75
+ quote-style = "single"
76
+ indent-style = "space"
77
+ docstring-code-format = true
mobts-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ from .main import hello_world
2
+
3
+ from .preprocessing import run_preprocess_stage_1, apply_threshold, preprocess
4
+ from .imputation import impute
File without changes
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Optional
3
+
4
+
5
+ @dataclass
6
+ class ColumnsConfig:
7
+ """
8
+ Canonical column names used in the pipeline after standardization.
9
+ """
10
+
11
+ counter: str = 'name'
12
+ timestamp: str = 'timestamp'
13
+ count: str = 'count'
14
+
15
+ weekday: str = 'weekday'
16
+ week_num: str = 'week_num'
17
+ how: str = 'how'
18
+ hour: str = 'hour'
19
+ date: str = 'date'
20
+
21
+
22
+ @dataclass
23
+ class SparsityConfig:
24
+ """
25
+ For removing counters with not enough valid counts
26
+ """
27
+
28
+ drop_sparse_counters: bool = True
29
+ sparse_threshold: float = 0.5
@@ -0,0 +1,64 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+
5
+ @dataclass
6
+ class STLConfig:
7
+ """
8
+ Config used in STL imputation
9
+ """
10
+
11
+ # STL seasonal period (for daily)
12
+ stl_season_daily = 7
13
+ stl_season_hourly = 168
14
+
15
+ # clipping
16
+ clip_lower = 0
17
+
18
+ # rollinng median
19
+ rolling_median_window: int = 2
20
+ rolling_median_min_valid: int = 1
21
+
22
+ # STL robust
23
+ stl_robust = False
24
+
25
+
26
+ @dataclass
27
+ class DonorsConfig:
28
+ """
29
+ Configs for Donor-based imputation
30
+ """
31
+
32
+ top_k_donor = 25
33
+ max_donor_rate = 0.5
34
+
35
+ # scaled median
36
+ sm_min_overlap_day = 60
37
+ sm_min_overlap_hour = sm_min_overlap_day * 24
38
+ sm_min_neighbors = 20
39
+
40
+ # regression
41
+ min_mutual_days = 60
42
+ min_mutual_hours = min_mutual_days * 24
43
+ min_pred_days = 30
44
+ min_pred_hours = min_pred_days * 24
45
+ min_pred_coverage = 0.9
46
+
47
+
48
+ @dataclass
49
+ class OutputConfig:
50
+ """
51
+ Configs for output columns and final selection
52
+ """
53
+
54
+ # calculated column names
55
+ col_intp = 'count_intp'
56
+ col_stl_imputed = 'count_stl_imputed'
57
+ col_sm_imputed: str = 'count_sm_imputed'
58
+ col_reg_imputed: str = 'count_reg_imputed'
59
+ col_final: str = 'count_imputed'
60
+ col_method_used: str = 'imputation_method'
61
+
62
+ stl_method: str = 'STL'
63
+ sm_method: str = 'M7'
64
+ reg_method: str = 'M8'
@@ -0,0 +1,70 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Optional
3
+ from .config_common import ColumnsConfig, SparsityConfig
4
+
5
+
6
+ @dataclass
7
+ class PreprocessConfig:
8
+ """
9
+ Parameters for low-count/zero-run cleaning and operational window trimming.
10
+ if avail_min_valid days out of avail_window is not present, the whole window will be set as non-operational.
11
+ """
12
+
13
+ low_rel_daily: float = 0.01 # threshold as fraction of station median
14
+ low_abs_daily: float = 5 # absolute floor threshold to be considered low count noise
15
+ low_run_min_daily: int = 2 # consecutive low count days to be set to NaN
16
+
17
+ zero_rate_max: float = 0.05 # threshold to consider 0s normal
18
+
19
+ night_hours = [1, 2, 3, 4, 5, 6]
20
+
21
+ zero_run_min: int = 6
22
+ island_max_len: int = 6
23
+ surround_min_len: int = 12
24
+
25
+
26
+ @dataclass
27
+ class STLConfig:
28
+ """
29
+ Parameters for STL decomposition outlier scoring.
30
+ """
31
+
32
+ period: int = 28 # seasonal period in days, set to 4 weeks
33
+ robust: bool = False # set to False to avoid heavy computation
34
+
35
+
36
+ @dataclass
37
+ class OutlierConfig:
38
+ """
39
+ Parameters for thresholding STL outlier scores.
40
+ """
41
+
42
+ threshold_daily: float = 20 # threshold to be tuned via plotting
43
+ threshold_hourly: float = 45 # threshold to be tuned via plotting
44
+
45
+
46
+ @dataclass
47
+ class PlotConfig:
48
+ """
49
+ Parameters for plotting the detected outliers.
50
+ """
51
+
52
+ ncols: int = 3
53
+ figsize_width: float = 15
54
+ min_fig_height: float = 10
55
+ height_per_row: float = 3
56
+ linewidth_d: float = 0.5
57
+ linewidth_h: float = 0.3
58
+ marker_size: float = 10
59
+ x_label_rotation: int = 30
60
+ max_stations: Optional[int] = None
61
+
62
+
63
+ @dataclass
64
+ class PipelineConfig:
65
+ cols: ColumnsConfig = field(default_factory=ColumnsConfig)
66
+ sparse: SparsityConfig = field(default_factory=SparsityConfig)
67
+ preprocess: PreprocessConfig = field(default_factory=PreprocessConfig)
68
+ stl: STLConfig = field(default_factory=STLConfig)
69
+ outliers: OutlierConfig = field(default_factory=OutlierConfig)
70
+ plot: PlotConfig = field(default_factory=PlotConfig)
@@ -0,0 +1,29 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Module for division operations and custom exceptions.
5
+
6
+ This module provides functions and exceptions related to division operations.
7
+ It imports the `divide` function and the `CantDivideByZeroError` exception from
8
+ other modules and makes them available for use in this module.
9
+
10
+ Functions
11
+ ---------
12
+ divide(a, b)
13
+ Divide two numbers, raising a custom exception if the divisor is zero.
14
+
15
+ Exceptions
16
+ ----------
17
+ CantDivideByZeroError
18
+ Raised when an attempt is made to divide by zero.
19
+
20
+ Imports
21
+ --------
22
+ - divide: Function for performing division operations.
23
+ - CantDivideByZeroError: Exception raised for division by zero errors.
24
+ """
25
+
26
+ from .divider import divide
27
+ from .divider_error import CantDivideByZeroError
28
+
29
+ __all__ = ['divide', 'CantDivideByZeroError']
@@ -0,0 +1,55 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Module for division operations with custom exceptions.
5
+
6
+ This module provides a function for performing division
7
+ and raises a custom exception when attempting to divide by zero.
8
+
9
+ Functions
10
+ ---------
11
+ divide(a, b)
12
+ Divide two numbers, raising a custom exception if the divisor is zero.
13
+
14
+ Exceptions
15
+ ----------
16
+ CantDivideByZeroError
17
+ Raised when an attempt is made to divide by zero.
18
+ """
19
+
20
+ from .divider_error import CantDivideByZeroError
21
+
22
+
23
+ def divide(a, b):
24
+ """
25
+ Divide two numbers, raising a custom exception if the divisor is zero.
26
+
27
+ Parameters
28
+ ----------
29
+ a : float
30
+ The dividend.
31
+ b : float
32
+ The divisor.
33
+
34
+ Returns
35
+ -------
36
+ float
37
+ The result of the division.
38
+
39
+ Raises
40
+ ------
41
+ CantDivideByZeroError
42
+ If the divisor (b) is zero.
43
+
44
+ Examples
45
+ --------
46
+ >>> divide(10, 2)
47
+ 5.0
48
+ >>> divide(10, 0)
49
+ Traceback (most recent call last):
50
+ ...
51
+ CantDivideByZeroError
52
+ """
53
+ if b == 0:
54
+ raise CantDivideByZeroError()
55
+ return a / b
@@ -0,0 +1,61 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Module for custom exceptions related to calculator operations.
5
+
6
+ This module defines custom exceptions used in calculator operations,
7
+ including a base exception class and a specific exception for division by zero errors.
8
+
9
+ Classes
10
+ -------
11
+ CalculatorError
12
+ Base class for exceptions in calculator operations.
13
+ CantDivideByZeroError
14
+ Exception raised when an attempt is made to divide by zero.
15
+
16
+ Exceptions
17
+ ----------
18
+ CalculatorError
19
+ Base class for exceptions in the calculator domain.
20
+ CantDivideByZeroError
21
+ Raised specifically for division by zero errors.
22
+ """
23
+
24
+
25
+ class CalculatorError(Exception):
26
+ """
27
+ Base class for exceptions in calculator operations.
28
+
29
+ This class is intended to be used as a base class for other calculator-related
30
+ exceptions. It inherits from the built-in Exception class and allows for custom
31
+ exception handling in the calculator domain.
32
+
33
+ Parameters
34
+ ----------
35
+ args : tuple
36
+ Variable length argument list passed to the base Exception class.
37
+ """
38
+
39
+ def __init__(self, *args):
40
+ super().__init__(args)
41
+
42
+
43
+ class CantDivideByZeroError(CalculatorError):
44
+ """
45
+ Exception raised when an attempt is made to divide by zero.
46
+
47
+ This exception is a specific subclass of CalculatorError and is intended to be
48
+ used when a division by zero error occurs. It provides a custom error message
49
+ indicating that division by zero is not allowed.
50
+
51
+ Parameters
52
+ ----------
53
+ None
54
+
55
+ Notes
56
+ -----
57
+ The default message for this exception is "tu ne peux pas diviser par zéro".
58
+ """
59
+
60
+ def __init__(self):
61
+ super().__init__('tu ne peux pas diviser par zéro')
@@ -0,0 +1,2 @@
1
+ from .pipeline import impute
2
+ from .donors import impute_scaled_median, impute_regression