filterframes 0.1.3__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. filterframes-0.2.0/.github/dependabot.yml +11 -0
  2. filterframes-0.2.0/.github/workflows/python-package.yml +44 -0
  3. filterframes-0.2.0/.github/workflows/python-publish.yml +28 -0
  4. filterframes-0.2.0/.gitignore +20 -0
  5. filterframes-0.2.0/CHANGELOG.md +26 -0
  6. {filterframes-0.1.3/src/filterframes.egg-info → filterframes-0.2.0}/PKG-INFO +18 -4
  7. filterframes-0.2.0/justfile +39 -0
  8. filterframes-0.2.0/pyproject.toml +68 -0
  9. filterframes-0.2.0/src/filterframes/__init__.py +15 -0
  10. {filterframes-0.1.3 → filterframes-0.2.0}/src/filterframes/filterframes.py +99 -76
  11. filterframes-0.2.0/src/filterframes/py.typed +0 -0
  12. {filterframes-0.1.3 → filterframes-0.2.0/src/filterframes.egg-info}/PKG-INFO +18 -4
  13. {filterframes-0.1.3 → filterframes-0.2.0}/src/filterframes.egg-info/SOURCES.txt +11 -2
  14. filterframes-0.2.0/src/filterframes.egg-info/requires.txt +7 -0
  15. filterframes-0.2.0/tests/data/DTASelect-filter_V2_1_12_paser.txt +176 -0
  16. filterframes-0.2.0/tests/data/DTASelect-filter_V2_1_13.txt +161 -0
  17. filterframes-0.2.0/tests/test_filterframes.py +128 -0
  18. filterframes-0.2.0/uv.lock +927 -0
  19. filterframes-0.1.3/pyproject.toml +0 -36
  20. filterframes-0.1.3/setup.py +0 -5
  21. filterframes-0.1.3/src/filterframes/__init__.py +0 -13
  22. filterframes-0.1.3/src/filterframes.egg-info/requires.txt +0 -1
  23. filterframes-0.1.3/tests/test_filterframes.py +0 -47
  24. {filterframes-0.1.3 → filterframes-0.2.0}/LICENSE +0 -0
  25. {filterframes-0.1.3 → filterframes-0.2.0}/README.md +0 -0
  26. {filterframes-0.1.3 → filterframes-0.2.0}/setup.cfg +0 -0
  27. {filterframes-0.1.3 → filterframes-0.2.0}/src/filterframes.egg-info/dependency_links.txt +0 -0
  28. {filterframes-0.1.3 → filterframes-0.2.0}/src/filterframes.egg-info/top_level.txt +0 -0
@@ -0,0 +1,11 @@
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "pip"
9
+ directory: "/"
10
+ schedule:
11
+ interval: "daily"
@@ -0,0 +1,44 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - name: Install uv
15
+ uses: astral-sh/setup-uv@v6
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.13"
19
+ - name: Install dependencies
20
+ run: uv sync --extra dev
21
+ - name: Ruff lint
22
+ run: uvx ruff check src/ tests/
23
+ - name: Ruff format
24
+ run: uvx ruff format --check src/ tests/
25
+ - name: Type check with ty
26
+ run: uvx ty check src/
27
+
28
+ test:
29
+ runs-on: ubuntu-latest
30
+ strategy:
31
+ fail-fast: false
32
+ matrix:
33
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+ - name: Install uv
37
+ uses: astral-sh/setup-uv@v6
38
+ - uses: actions/setup-python@v5
39
+ with:
40
+ python-version: ${{ matrix.python-version }}
41
+ - name: Install dependencies
42
+ run: uv sync --extra dev
43
+ - name: Test with pytest
44
+ run: uv run pytest --cov=filterframes --cov-report=term-missing
@@ -0,0 +1,28 @@
1
+ name: Upload Python Package
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ deploy:
12
+
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v6
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: '3.13'
22
+ - name: Build package
23
+ run: uv build
24
+ - name: Publish package
25
+ uses: pypa/gh-action-pypi-publish@release/v1
26
+ with:
27
+ user: __token__
28
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,20 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ *.egg-info/
8
+ *.egg
9
+ dist/
10
+ build/
11
+
12
+ # Testing / coverage
13
+ .coverage
14
+ .pytest_cache/
15
+ htmlcov/
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
@@ -0,0 +1,26 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.2.0]
6
+
7
+ ### Changed
8
+ - Minimum Python version raised from 3.8 to 3.9
9
+ - Added Python 3.12 and 3.13 to CI test matrix
10
+ - Updated GitHub Actions to v4/v5 for checkout and setup-python
11
+ - Improved exception chaining (`raise ... from exc`) for better tracebacks
12
+ - Renamed type alias from `FILE_TYPES` to `FileTypes` (PEP 8 compliance)
13
+ - Added `py.typed` marker for PEP 561 typed package support
14
+ - Added logging throughout the parsing pipeline
15
+ - Added input validation for malformed DTASelect-filter files
16
+ - Added `pytest-cov` for test coverage reporting in CI
17
+ - Expanded test suite with input type, error handling, and data integrity tests
18
+ - Modernized `pyproject.toml` with optional `[dev]` dependencies and tool configs
19
+ - Removed legacy `setup.py` (not needed with modern pip)
20
+ - Fixed potential crash when `end_lines` is empty
21
+ - Pinned minimum pandas version to `>=1.5`
22
+
23
+ ## [0.1.3]
24
+
25
+ ### Changed
26
+ - `_get_lines` now works with streamlit uploaded file, and any io-type
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: filterframes
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: A very simple DTASelect-Filter.txt parser.
5
5
  Author-email: Patrick Garrett <pgarrett@scripps.edu>
6
6
  License: MIT License
@@ -25,15 +25,29 @@ License: MIT License
25
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
26
  SOFTWARE.
27
27
 
28
- Project-URL: repository, https://github.com/pgarrett-scripps/FilterFrames.git
28
+ Project-URL: Repository, https://github.com/pgarrett-scripps/FilterFrames
29
+ Project-URL: Changelog, https://github.com/pgarrett-scripps/FilterFrames/blob/main/CHANGELOG.md
29
30
  Keywords: IP2,PASER,Parser,Streamlit,DTASelect-filter,Peptide,Protein,Proteomics
30
31
  Classifier: Programming Language :: Python :: 3
32
+ Classifier: Programming Language :: Python :: 3.9
33
+ Classifier: Programming Language :: Python :: 3.10
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
31
37
  Classifier: Development Status :: 4 - Beta
32
38
  Classifier: License :: OSI Approved :: MIT License
33
39
  Classifier: Operating System :: OS Independent
34
- Requires-Python: >=3.8
40
+ Classifier: Typing :: Typed
41
+ Requires-Python: >=3.9
35
42
  Description-Content-Type: text/markdown
36
43
  License-File: LICENSE
44
+ Requires-Dist: pandas>=1.5
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=7; extra == "dev"
47
+ Requires-Dist: pytest-cov>=4; extra == "dev"
48
+ Requires-Dist: ruff>=0.4; extra == "dev"
49
+ Requires-Dist: ty; extra == "dev"
50
+ Dynamic: license-file
37
51
 
38
52
  ![example workflow](https://github.com/pgarrett-scripps/FilterFrames/actions/workflows/python-package.yml/badge.svg)
39
53
  ![example workflow](https://github.com/pgarrett-scripps/FilterFrames/actions/workflows/pylint.yml/badge.svg)
@@ -0,0 +1,39 @@
1
+ # List available recipes
2
+ default:
3
+ @just --list
4
+
5
+ # Install the package with dev dependencies
6
+ install:
7
+ uv sync --extra dev
8
+
9
+ # Run all checks (lint, format, typecheck, test)
10
+ check: lint format typecheck test
11
+
12
+ # Run ruff linter
13
+ lint:
14
+ uvx ruff check src/ tests/
15
+
16
+ # Check code formatting
17
+ format:
18
+ uvx ruff format --check src/ tests/
19
+
20
+ # Auto-fix lint issues and format code
21
+ fix:
22
+ uvx ruff check --fix src/ tests/
23
+ uvx ruff format src/ tests/
24
+
25
+ # Run ty type checker
26
+ typecheck:
27
+ uvx ty check src/
28
+
29
+ # Run tests
30
+ test *args:
31
+ uv run pytest {{ args }}
32
+
33
+ # Run tests with coverage
34
+ test-cov:
35
+ uv run pytest --cov=filterframes --cov-report=term-missing
36
+
37
+ # Build the package
38
+ build:
39
+ uv build
@@ -0,0 +1,68 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "setuptools-scm"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "filterframes"
7
+ authors = [
8
+ {name = "Patrick Garrett", email = "pgarrett@scripps.edu"},
9
+ ]
10
+ description = "A very simple DTASelect-Filter.txt parser."
11
+ readme = "README.md"
12
+ requires-python = ">=3.9"
13
+ dynamic = ["version"]
14
+ license = {file = "LICENSE"}
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.9",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Development Status :: 4 - Beta",
23
+ "License :: OSI Approved :: MIT License",
24
+ "Operating System :: OS Independent",
25
+ "Typing :: Typed",
26
+ ]
27
+ dependencies = [
28
+ "pandas>=1.5",
29
+ ]
30
+ keywords = ["IP2", "PASER", "Parser", "Streamlit", "DTASelect-filter", "Peptide", "Protein", "Proteomics"]
31
+
32
+ [project.optional-dependencies]
33
+ dev = [
34
+ "pytest>=7",
35
+ "pytest-cov>=4",
36
+ "ruff>=0.4",
37
+ "ty",
38
+ ]
39
+
40
+ [project.urls]
41
+ Repository = "https://github.com/pgarrett-scripps/FilterFrames"
42
+ Changelog = "https://github.com/pgarrett-scripps/FilterFrames/blob/main/CHANGELOG.md"
43
+
44
+ [tool.setuptools]
45
+ package-dir = {"" = "src"}
46
+
47
+ [tool.setuptools.dynamic]
48
+ version = {attr = "filterframes.__version__"}
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
52
+
53
+ [tool.ruff]
54
+ target-version = "py39"
55
+ line-length = 120
56
+ src = ["src"]
57
+
58
+ [tool.ruff.lint]
59
+ select = [
60
+ "E", # pycodestyle errors
61
+ "W", # pycodestyle warnings
62
+ "F", # pyflakes
63
+ "I", # isort
64
+ "UP", # pyupgrade
65
+ "B", # flake8-bugbear
66
+ "SIM", # flake8-simplify
67
+ "RUF", # ruff-specific rules
68
+ ]
@@ -0,0 +1,15 @@
1
+ """FilterFrames: A DTASelect-filter.txt parser built on pandas."""
2
+
3
+ from .filterframes import (
4
+ FileTypes,
5
+ from_dta_select_filter,
6
+ to_dta_select_filter,
7
+ )
8
+
9
+ __all__ = [
10
+ "FileTypes",
11
+ "from_dta_select_filter",
12
+ "to_dta_select_filter",
13
+ ]
14
+
15
+ __version__ = "0.2.0"
@@ -1,15 +1,23 @@
1
- """Module providing function for converting between DTASelectFilter.tx files and pandas DataFrame objects"""
1
+ """Module providing functions for converting between DTASelect-filter.txt files and pandas DataFrame objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
2
6
  import os
7
+ from collections.abc import Generator
3
8
  from enum import Enum
4
- from io import TextIOWrapper, StringIO
5
- from typing import List, Union, Any, TextIO, Generator
9
+ from io import StringIO, TextIOWrapper
10
+ from typing import Any, TextIO, Union
6
11
 
7
12
  import pandas as pd
8
13
 
9
- FILE_TYPES = Union[str, TextIOWrapper, StringIO, TextIO]
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Union form required at runtime for Python 3.9 compatibility
17
+ FileTypes = Union[str, TextIOWrapper, StringIO, TextIO]
10
18
 
11
19
 
12
- def _get_lines(file_input: FILE_TYPES) -> Generator[str, None, None]:
20
+ def _get_lines(file_input: FileTypes) -> Generator[str, None, None]:
13
21
  """
14
22
  Retrieve lines from a file or string input.
15
23
 
@@ -25,31 +33,30 @@ def _get_lines(file_input: FILE_TYPES) -> Generator[str, None, None]:
25
33
  Raises:
26
34
  ValueError: If the input type is not supported.
27
35
  """
28
- if isinstance(file_input, str): # File path or string
36
+ if isinstance(file_input, str): # File path or string
29
37
  if os.path.exists(file_input):
30
- with open(file=file_input, mode='r', encoding='UTF-8') as file:
38
+ logger.debug("Reading from file path: %s", file_input)
39
+ with open(file=file_input, encoding="UTF-8") as file:
31
40
  for line in file:
32
- yield line.rstrip('\n')
41
+ yield line.rstrip("\n")
33
42
  else:
34
- for line in file_input.split('\n'):
35
- yield line.rstrip('\n')
36
- elif isinstance(file_input, (TextIOWrapper, TextIO)): # TextIOWrapper or StringIO
43
+ logger.debug("Reading from raw string input")
44
+ for line in file_input.split("\n"):
45
+ yield line.rstrip("\n")
46
+ elif isinstance(file_input, (TextIOWrapper, StringIO)):
47
+ logger.debug("Reading from %s object", type(file_input).__name__)
37
48
  file_input.seek(0)
38
49
  for line in file_input:
39
- yield line.rstrip('\n')
40
- elif isinstance(file_input, StringIO): # StringIO
41
- file_input.seek(0)
42
- for line in file_input.readlines():
43
- yield line.rstrip('\n')
50
+ yield line.rstrip("\n")
44
51
  else:
45
52
  try:
46
53
  for line in file_input:
47
- yield line.decode('UTF-8').rstrip('\n')
48
- except Exception as e:
49
- raise ValueError(f'Unsupported input type: {type(file_input)}!')
54
+ yield line.decode("UTF-8").rstrip("\n") # ty: ignore[unresolved-attribute]
55
+ except (AttributeError, TypeError, UnicodeDecodeError) as exc:
56
+ raise ValueError(f"Unsupported input type: {type(file_input)}") from exc
50
57
 
51
58
 
52
- def _convert_to_best_datatype(values: List[Any]):
59
+ def _convert_to_best_datatype(values: list[Any]) -> list[Any]:
53
60
  """
54
61
  Convert a list of values to the most suitable datatype.
55
62
 
@@ -64,11 +71,9 @@ def _convert_to_best_datatype(values: List[Any]):
64
71
  Raises:
65
72
  ValueError: If unable to convert values to any datatype.
66
73
  """
67
-
68
74
  for datatype in [float, int, str]:
69
75
  try:
70
- converted_values = [datatype(value) for value in values]
71
- return converted_values
76
+ return [datatype(value) for value in values]
72
77
  except (ValueError, TypeError):
73
78
  continue
74
79
  raise ValueError("Unable to convert values to any datatype")
@@ -104,12 +109,12 @@ def _reorder_columns(dataframe: pd.DataFrame, column: str, new_position: int) ->
104
109
  pd.DataFrame: A dataframe with reordered columns.
105
110
  """
106
111
 
107
- columns = dataframe.columns.tolist()
112
+ columns: list[str] = dataframe.columns.tolist()
108
113
  columns.insert(new_position, columns.pop(columns.index(column)))
109
- return dataframe[columns]
114
+ return dataframe.reindex(columns=columns)
110
115
 
111
116
 
112
- def _write_lines(file_output, lines):
117
+ def _write_lines(file_output: TextIOWrapper | StringIO, lines: list[str]) -> None:
113
118
  """
114
119
  Write a list of lines to a given file output.
115
120
 
@@ -119,11 +124,12 @@ def _write_lines(file_output, lines):
119
124
  """
120
125
 
121
126
  for line in lines:
122
- file_output.write(line + '\n')
127
+ file_output.write(line + "\n")
123
128
 
124
129
 
125
- def from_dta_select_filter(file_input: Union[str, TextIOWrapper, StringIO, TextIO]) -> (
126
- List[str], pd.DataFrame, pd.DataFrame, List[str]):
130
+ def from_dta_select_filter(
131
+ file_input: str | TextIOWrapper | StringIO | TextIO,
132
+ ) -> tuple[list[str], pd.DataFrame, pd.DataFrame, list[str]]:
127
133
  """
128
134
  Process the given file and extract relevant information to create peptide and protein dataframes.
129
135
 
@@ -143,47 +149,49 @@ def from_dta_select_filter(file_input: Union[str, TextIOWrapper, StringIO, TextI
143
149
 
144
150
  lines = _get_lines(file_input)
145
151
 
146
- class FileState(Enum):
147
- """
148
- Enum for specifying the different parts of the DTASelect-filter.txt file
149
- """
152
+ class _FileState(Enum):
150
153
  HEADER = 1
151
154
  DATA = 2
152
155
  INFO = 3
153
156
 
154
- file_state = FileState.HEADER
157
+ file_state = _FileState.HEADER
155
158
 
156
- header_lines, end_lines = [], []
157
- peptide_data, protein_data = None, None
158
- current_protein_grp, peptide_line_cnt = 0, 0
159
+ header_lines: list[str] = []
160
+ end_lines: list[str] = []
161
+ peptide_data: dict[str, list[Any]] | None = None
162
+ protein_data: dict[str, list[Any]] | None = None
163
+ current_protein_grp = 0
164
+ peptide_line_cnt = 0
159
165
 
160
- for i, line in enumerate(lines):
166
+ for line in lines:
161
167
  line_elements = line.rstrip().split("\t")
162
168
 
163
- if line.startswith('Locus'): # Protein Line Header
169
+ if line.startswith("Locus"): # Protein Line Header
164
170
  protein_data = {key: [] for key in line_elements}
165
- protein_data['ProteinGroup'] = []
171
+ protein_data["ProteinGroup"] = []
166
172
 
167
- if line.startswith('Unique'): # Peptide Line Header
173
+ if line.startswith("Unique"): # Peptide Line Header
168
174
  peptide_data = {key: [] for key in line_elements}
169
- peptide_data['ProteinGroup'] = []
175
+ peptide_data["ProteinGroup"] = []
170
176
 
171
177
  header_lines.append(line)
172
- file_state = FileState.DATA
178
+ file_state = _FileState.DATA
173
179
  continue
174
180
 
175
181
  if len(line_elements) > 1 and line_elements[1] == "Proteins":
176
- file_state = FileState.INFO
182
+ file_state = _FileState.INFO
177
183
 
178
- if file_state == FileState.HEADER:
184
+ if file_state == _FileState.HEADER:
179
185
  header_lines.append(line)
180
186
 
181
- if file_state == FileState.DATA:
182
- if line_elements[0] == '' or '*' in line_elements[0] or line_elements[0].isnumeric():
187
+ if file_state == _FileState.DATA:
188
+ if peptide_data is None or protein_data is None:
189
+ continue
183
190
 
191
+ if line_elements[0] == "" or "*" in line_elements[0] or line_elements[0].isnumeric():
184
192
  for key, value in zip(peptide_data, line_elements):
185
193
  peptide_data[key].append(value)
186
- peptide_data['ProteinGroup'].append(current_protein_grp)
194
+ peptide_data["ProteinGroup"].append(current_protein_grp)
187
195
 
188
196
  peptide_line_cnt += 1
189
197
  else:
@@ -193,11 +201,20 @@ def from_dta_select_filter(file_input: Union[str, TextIOWrapper, StringIO, TextI
193
201
 
194
202
  for key, value in zip(protein_data, line_elements):
195
203
  protein_data[key].append(value)
196
- protein_data['ProteinGroup'].append(current_protein_grp)
204
+ protein_data["ProteinGroup"].append(current_protein_grp)
197
205
 
198
- if file_state == FileState.INFO:
206
+ if file_state == _FileState.INFO:
199
207
  end_lines.append(line)
200
208
 
209
+ if peptide_data is None or protein_data is None:
210
+ raise ValueError("Input does not appear to be a valid DTASelect-filter file: missing header columns")
211
+
212
+ logger.debug(
213
+ "Parsed %d peptide columns, %d protein columns",
214
+ len(peptide_data),
215
+ len(protein_data),
216
+ )
217
+
201
218
  for k in peptide_data:
202
219
  peptide_data[k] = _convert_to_best_datatype(peptide_data[k])
203
220
 
@@ -207,27 +224,33 @@ def from_dta_select_filter(file_input: Union[str, TextIOWrapper, StringIO, TextI
207
224
  peptide_df = pd.DataFrame(peptide_data)
208
225
  protein_df = pd.DataFrame(protein_data)
209
226
 
210
- file_name_components = [fn.split('.') for fn in peptide_df['FileName']]
211
- peptide_df.drop(['FileName'], axis=1, inplace=True)
227
+ file_name_components = [fn.split(".") for fn in peptide_df["FileName"]]
228
+ peptide_df.drop(["FileName"], axis=1, inplace=True)
212
229
 
213
- peptide_df['FileName'] = _convert_to_best_datatype([comp[0] for comp in file_name_components])
214
- peptide_df['FileName'] = peptide_df['FileName'].astype('category')
230
+ peptide_df["FileName"] = _convert_to_best_datatype([comp[0] for comp in file_name_components])
231
+ peptide_df["FileName"] = peptide_df["FileName"].astype("category")
215
232
 
216
- peptide_df['LowScan'] = _convert_to_best_datatype([comp[1] for comp in file_name_components])
217
- peptide_df['HighScan'] = _convert_to_best_datatype([comp[2] for comp in file_name_components])
218
- peptide_df['Charge'] = _convert_to_best_datatype([comp[3] for comp in file_name_components])
233
+ peptide_df["LowScan"] = _convert_to_best_datatype([comp[1] for comp in file_name_components])
234
+ peptide_df["HighScan"] = _convert_to_best_datatype([comp[2] for comp in file_name_components])
235
+ peptide_df["Charge"] = _convert_to_best_datatype([comp[3] for comp in file_name_components])
219
236
 
220
237
  peptide_df = peptide_df.convert_dtypes()
221
238
  protein_df = protein_df.convert_dtypes()
222
239
 
223
- if end_lines[-1] == '':
240
+ if end_lines and end_lines[-1] == "":
224
241
  end_lines = end_lines[:-1]
225
242
 
243
+ logger.info("Parsed %d proteins and %d peptides", len(protein_df), len(peptide_df))
244
+
226
245
  return header_lines, peptide_df, protein_df, end_lines
227
246
 
228
247
 
229
- def to_dta_select_filter(header_lines: List[str], peptide_df: pd.DataFrame, protein_df: pd.DataFrame,
230
- end_lines: List[str]) -> StringIO:
248
+ def to_dta_select_filter(
249
+ header_lines: list[str],
250
+ peptide_df: pd.DataFrame,
251
+ protein_df: pd.DataFrame,
252
+ end_lines: list[str],
253
+ ) -> StringIO:
231
254
  """
232
255
  Convert the given header lines, peptide and protein dataframes, and end lines into a StringIO object.
233
256
 
@@ -254,40 +277,40 @@ def to_dta_select_filter(header_lines: List[str], peptide_df: pd.DataFrame, prot
254
277
 
255
278
  # Write protein and peptide data
256
279
  concatenated_file_names = peptide_df.apply(_create_file_name, axis=1)
257
- peptide_df.drop(['FileName', 'LowScan', 'HighScan', 'Charge'], axis=1, inplace=True)
258
- peptide_df['FileName'] = concatenated_file_names
280
+ peptide_df.drop(["FileName", "LowScan", "HighScan", "Charge"], axis=1, inplace=True)
281
+ peptide_df["FileName"] = concatenated_file_names
259
282
  # Re-order columns to make FileName the second column
260
- peptide_df = _reorder_columns(peptide_df, 'FileName', 1)
283
+ peptide_df = _reorder_columns(peptide_df, "FileName", 1)
261
284
 
262
- protein_data_str = protein_df.drop(['ProteinGroup'], axis=1).to_csv(header=False, index=False, sep='\t')
263
- peptide_data_str = peptide_df.drop(['ProteinGroup'], axis=1).to_csv(header=False, index=False, sep='\t')
285
+ protein_data_str = protein_df.drop(["ProteinGroup"], axis=1).to_csv(header=False, index=False, sep="\t")
286
+ peptide_data_str = peptide_df.drop(["ProteinGroup"], axis=1).to_csv(header=False, index=False, sep="\t")
264
287
 
265
- protein_data_str = protein_data_str.replace('\r', '')
266
- peptide_data_str = peptide_data_str.replace('\r', '')
288
+ protein_data_str = protein_data_str.replace("\r", "")
289
+ peptide_data_str = peptide_data_str.replace("\r", "")
267
290
 
268
291
  current_protein_grp = 0
269
- protein_lines = protein_data_str.split('\n')
270
- peptide_lines = peptide_data_str.split('\n')
292
+ protein_lines = protein_data_str.split("\n")
293
+ peptide_lines = peptide_data_str.split("\n")
271
294
 
272
- if protein_lines[-1] == '':
295
+ if protein_lines[-1] == "":
273
296
  protein_lines = protein_lines[:-1]
274
297
 
275
- if peptide_lines[-1] == '':
298
+ if peptide_lines[-1] == "":
276
299
  peptide_lines = peptide_lines[:-1]
277
300
 
278
301
  protein_line_idx = 0
279
302
  peptide_line_idx = 0
280
303
 
281
304
  while protein_line_idx < len(protein_lines) and peptide_line_idx < len(peptide_lines):
282
- if int(protein_df.iloc[protein_line_idx]['ProteinGroup']) == current_protein_grp:
283
- file_output.write(protein_lines[protein_line_idx] + '\n')
305
+ if int(protein_df.iloc[protein_line_idx]["ProteinGroup"]) == current_protein_grp:
306
+ file_output.write(protein_lines[protein_line_idx] + "\n")
284
307
  protein_line_idx += 1
285
308
  else:
286
- file_output.write(peptide_lines[peptide_line_idx] + '\n')
309
+ file_output.write(peptide_lines[peptide_line_idx] + "\n")
287
310
  peptide_line_idx += 1
288
311
  if peptide_line_idx < len(peptide_lines) and int(
289
- peptide_df.iloc[peptide_line_idx - 1]['ProteinGroup']) != int(
290
- peptide_df.iloc[peptide_line_idx]['ProteinGroup']):
312
+ peptide_df.iloc[peptide_line_idx - 1]["ProteinGroup"]
313
+ ) != int(peptide_df.iloc[peptide_line_idx]["ProteinGroup"]):
291
314
  current_protein_grp += 1
292
315
 
293
316
  # Write remaining protein and peptide lines
File without changes
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: filterframes
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: A very simple DTASelect-Filter.txt parser.
5
5
  Author-email: Patrick Garrett <pgarrett@scripps.edu>
6
6
  License: MIT License
@@ -25,15 +25,29 @@ License: MIT License
25
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
26
  SOFTWARE.
27
27
 
28
- Project-URL: repository, https://github.com/pgarrett-scripps/FilterFrames.git
28
+ Project-URL: Repository, https://github.com/pgarrett-scripps/FilterFrames
29
+ Project-URL: Changelog, https://github.com/pgarrett-scripps/FilterFrames/blob/main/CHANGELOG.md
29
30
  Keywords: IP2,PASER,Parser,Streamlit,DTASelect-filter,Peptide,Protein,Proteomics
30
31
  Classifier: Programming Language :: Python :: 3
32
+ Classifier: Programming Language :: Python :: 3.9
33
+ Classifier: Programming Language :: Python :: 3.10
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
31
37
  Classifier: Development Status :: 4 - Beta
32
38
  Classifier: License :: OSI Approved :: MIT License
33
39
  Classifier: Operating System :: OS Independent
34
- Requires-Python: >=3.8
40
+ Classifier: Typing :: Typed
41
+ Requires-Python: >=3.9
35
42
  Description-Content-Type: text/markdown
36
43
  License-File: LICENSE
44
+ Requires-Dist: pandas>=1.5
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=7; extra == "dev"
47
+ Requires-Dist: pytest-cov>=4; extra == "dev"
48
+ Requires-Dist: ruff>=0.4; extra == "dev"
49
+ Requires-Dist: ty; extra == "dev"
50
+ Dynamic: license-file
37
51
 
38
52
  ![example workflow](https://github.com/pgarrett-scripps/FilterFrames/actions/workflows/python-package.yml/badge.svg)
39
53
  ![example workflow](https://github.com/pgarrett-scripps/FilterFrames/actions/workflows/pylint.yml/badge.svg)