PyPI - malcolm3utils - Versions diffs - 0.5.5__py3-none-any.whl - Mend

malcolm3utils 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

malcolm3utils/__init__.py +2 -0
malcolm3utils/py.typed +0 -0
malcolm3utils/scripts/getcol.py +100 -0
malcolm3utils/scripts/merge.py +201 -0
malcolm3utils/scripts/touch_latest.py +153 -0
malcolm3utils-0.5.5.dist-info/LICENCE +20 -0
malcolm3utils-0.5.5.dist-info/METADATA +131 -0
malcolm3utils-0.5.5.dist-info/RECORD +10 -0
malcolm3utils-0.5.5.dist-info/WHEEL +4 -0
malcolm3utils-0.5.5.dist-info/entry_points.txt +5 -0

malcolm3utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ __version__ = "0.5.5"
2	+ __version_message__ = "%(prog)s, malcolm3utils version %(version)s"

malcolm3utils/py.typed ADDED Viewed

File without changes

malcolm3utils/scripts/getcol.py ADDED Viewed

@@ -0,0 +1,100 @@
+#!/usr/bin/python
+import csv
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+import click
+from .. import __version__, __version_message__
+@click.command(
+    help="""
+Read the specified file and write out just the specified columns to stdout.
+The column_spec is a comma separated list of column headers, column indexes (one-based),
+or column ranges (e.g. 4-6 for columns 4 through 6 inclusive).
+If no file_to_read is specified, then input is read from stdin.
+"""
+)
+@click.option(
+    "-d", "--delimiter", type=str, help="column delimiter (default=TAB)", default="\t"
+)
+@click.option(
+    "-o",
+    "--output-delimiter",
+    type=str,
+    help="output column delimiter (default=input delimiter)",
+)
+@click.version_option(__version__, message=__version_message__)
+@click.argument("column_spec", type=str, required=True)
+@click.argument("file_to_read", type=click.Path(exists=True), required=False)
+def getcol(
+    column_spec: str,
+    file_to_read: Optional[Path] = None,
+    delimiter: str = "\t",
+    output_delimiter: Optional[str] = None,
+) -> None:
+    if output_delimiter is None:
+        output_delimiter = delimiter
+    column_list, includes_headers = _parse_column_spec(column_spec)
+    writer = csv.writer(sys.stdout, delimiter=output_delimiter)
+    try:
+        fh = sys.stdin
+        if file_to_read is not None:
+            fh = open(file_to_read)
+        reader = csv.reader(fh, delimiter=delimiter)
+        for irow, row in enumerate(reader):
+            if irow == 0 and includes_headers:
+                column_list = _process_headers(column_list, row)
+            output_row = [row[int(i)] for i in column_list]
+            writer.writerow(output_row)
+    finally:
+        if fh is not None:
+            fh.close()
+def _parse_column_spec(column_spec: str) -> Tuple[List[str | int], bool]:
+    column_list: List[str | int] = []
+    includes_headers = False
+    for spec in column_spec.split(","):
+        if "-" in spec:
+            range_parts = spec.split("-", 1)
+            if (
+                len(range_parts) == 2
+                and range_parts[0].isnumeric()
+                and range_parts[1].isnumeric()
+            ):
+                column_list.extend(range(int(range_parts[0]) - 1, int(range_parts[1])))
+            else:
+                column_list.append(spec)
+                includes_headers = True
+        elif spec.isnumeric():
+            column_list.append(int(spec) - 1)
+        else:
+            column_list.append(spec)
+            includes_headers = True
+    return column_list, includes_headers
+def _process_headers(
+    column_list: List[str | int], headers: List[str]
+) -> List[str | int]:
+    updated_column_list: List[str | int] = []
+    for col in column_list:
+        if isinstance(col, str):
+            try:
+                updated_column_list.append(headers.index(col))
+            except ValueError:
+                pass
+        elif isinstance(col, int):
+            updated_column_list.append(col)
+    return updated_column_list
+if __name__ == "__main__":
+    getcol()  # pragma: no cover

malcolm3utils/scripts/merge.py ADDED Viewed

@@ -0,0 +1,201 @@
+#!/usr/bin/python
+import csv
+import logging
+import sys
+from typing import Dict, Iterable, List, Optional, TextIO
+import click
+import click_logging
+from .. import __version__, __version_message__
+logger = logging.getLogger(__name__)
+click_logging.basic_config(logger)
+@click.command(
+    help="""
+Merge the specified delimited files with column headings, joining entries with
+the same key field value.
+The files do not need to be sorted on the key field as with join(1). This does
+require that all of the data be read into memory. If that is a problem, using
+the system join(1) command is recommended.
+Rows will be printed in the order that the unique key values are encountered
+when reading through the input files.
+To read from stdin, use '-' as the filename.
+The output key column will be the first column of the output file and the
+header will be the header from the first file.
+If -k is used to specify alternative keys columns for subsequent files, but
+those files have a column with the same name as the output key column, that
+will be ignored.
+"""
+)
+@click_logging.simple_verbosity_option(logger)
+@click.option(
+    "-d", "--delimiter", type=str, help="column delimiter (default=TAB)", default="\t"
+)
+@click.option(
+    "-o",
+    "--output-delimiter",
+    type=str,
+    help="output column delimiter (default=input delimiter)",
+)
+@click.option(
+    "--all-delimiter",
+    type=str,
+    help='when keep=="all" this will be the delimiter between entries where there are multiple '
+    '(default=";")',
+    default=";",
+)
+@click.option(
+    "-k",
+    "--key-column",
+    type=str,
+    help="comma separated list of key column identifiers. "
+    "each new file will use the next identifier. "
+    "the last identifier will be used for all remaining files, "
+    'so just use "-k identifier" if the identifier is the same for all files. '
+    "The identifier can either be the header string or the one-based column index. "
+    "(default=1 (i.e. the first column of each file))",
+    default="1",
+)
+@click.option(
+    "--keep",
+    type=click.Choice(["first", "last", "uniq", "all"], case_sensitive=False),
+    default="all",
+    help="specifies how to handle multiple values for the same field with the same key",
+)
+@click.option(
+    "-I",
+    "--ignore",
+    type=str,
+    help="comma separated list of column identifiers to ignore",
+)
+@click.version_option(__version__, message=__version_message__)
+@click.argument("files_to_read", nargs=-1, type=click.File("r"), required=False)
+def merge(
+    files_to_read: Iterable[TextIO] = (),
+    key_column: str = "1",
+    delimiter: str = "\t",
+    output_delimiter: Optional[str] = None,
+    keep: str = "all",
+    all_delimiter: str = ";",
+    ignore: str | None = None,
+) -> None:
+    if output_delimiter is None:
+        output_delimiter = delimiter
+    key_column_list = key_column.split(",")
+    ignore_set = set()
+    if ignore is not None:
+        ignore_set.update(ignore.split(","))
+    data: Dict[str, Dict[str, str]] = {}
+    output_key = None
+    data_field_list = []
+    for ifile, fh in enumerate(files_to_read):
+        logger.debug('processing file "%s"', fh.name)
+        if ifile >= len(key_column_list):
+            ifile = -1
+        key = key_column_list[ifile]
+        reader = csv.DictReader(fh, delimiter=delimiter)
+        if reader.fieldnames is None:
+            logger.warning('No fieldnames found in file "%s", skipping file.', fh.name)
+            continue
+        this_data_field_list = [x for x in reader.fieldnames if x not in ignore_set]
+        if key.isnumeric():
+            key = this_data_field_list[int(key) - 1]
+        elif key not in this_data_field_list:
+            logger.warning(
+                'Key "%s" not found in file "%s", skipping file.', key, fh.name
+            )
+            continue
+        logger.debug('...using key "%s"', key)
+        this_data_field_list.remove(key)
+        if output_key is None:
+            output_key = key
+            data_field_list.append(output_key)
+        if output_key in this_data_field_list:
+            this_data_field_list.remove(output_key)
+        _process_rows(
+            reader,
+            fh.name,
+            key,
+            output_key,
+            keep,
+            all_delimiter,
+            this_data_field_list,
+            data,
+        )
+        data_field_list.extend(
+            [x for x in this_data_field_list if x not in data_field_list]
+        )
+    logger.debug("writing output")
+    writer = csv.DictWriter(
+        sys.stdout, fieldnames=data_field_list, delimiter=output_delimiter
+    )
+    writer.writeheader()
+    writer.writerows(data.values())
+def _process_rows(
+    reader: csv.DictReader,
+    fname: str,
+    key: str,
+    output_key: str,
+    keep: str,
+    all_delimiter: str,
+    data_field_list: List[str],
+    data: Dict[str, Dict[str, str]],
+) -> None:
+    irow = 0
+    for irow, row in enumerate(reader):
+        key_value = row.get(key, None)
+        if key_value is None or len(key_value) == 0:
+            logger.warning(
+                'No key value found for line %d in file "%s", skipping line.',
+                irow + 2,
+                fname,
+            )
+            continue
+        if key_value not in data:
+            data[key_value] = {output_key: key_value}
+        entry = data[key_value]
+        _process_row(row, data_field_list, keep, all_delimiter, entry)
+    logger.debug("...processed %d entries", irow + 1)
+    logger.debug("...total unique entries is now %d", len(data))
+def _process_row(
+    row: Dict[str, str],
+    data_field_list: List[str],
+    keep: str,
+    all_delimiter: str,
+    entry: Dict[str, str],
+) -> None:
+    for data_field in data_field_list:
+        data_value = row[data_field]
+        if data_value is None or len(data_value) == 0:
+            pass
+        elif data_field not in entry or keep == "last":
+            entry[data_field] = data_value
+        elif keep == "all":
+            entry[data_field] += all_delimiter
+            entry[data_field] += data_value
+        elif keep == "uniq":
+            if data_value not in entry[data_field].split(all_delimiter):
+                entry[data_field] += all_delimiter
+                entry[data_field] += data_value
+        else:  # keep == 'first' so ignore subsequent values
+            pass
+if __name__ == "__main__":
+    merge()  # pragma: no cover

malcolm3utils/scripts/touch_latest.py ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/python
+import os
+from collections.abc import Iterable
+from fnmatch import fnmatch
+from pathlib import Path
+import click
+from .. import __version__, __version_message__
+# be sure to update docstring if you change DEFAULT_IGNORE_GLOBS
+DEFAULT_IGNORE_GLOBS = ["*~", "*.pyc", "#*", ".*", "*.OLD", "OLD"]
+@click.command()
+@click.option(
+    "-i",
+    "--ignore",
+    "ignore_patterns",
+    multiple=True,
+    type=str,
+    help="glob patterns to ignore (likely needs to be quoted, and can be repeated)",
+)
+@click.option(
+    "-f",
+    "--ignore-file",
+    "ignore_pattern_files",
+    multiple=True,
+    type=click.Path(exists=True),
+    help="file with glob patterns (one per line) to ignore (can be repeated)",
+)
+@click.option(
+    "-n", "--no-default-ignore", is_flag=True, help="do not use default ignore globs"
+)
+@click.version_option(__version__, message=__version_message__)
+@click.argument("touch_file", type=str)
+@click.argument("paths_to_check", nargs=-1, type=click.Path(exists=True), required=True)
+def touch_latest(
+    touch_file: str,
+    paths_to_check: list[str | Path],
+    ignore_patterns: Iterable[str] = (),
+    ignore_pattern_files: Iterable[str | Path] = (),
+    no_default_ignore: bool = False,
+) -> None:
+    """
+    Find the latest changed date of file under the specified PATHS_TO_CHECK
+    and touch the TOUCH_FILE with that date (creating it if necessary).
+    Files that match ignore patterns will be ignored when locating searching
+    for the file with the latest change date.
+    Patterns that contain slashes either need to be absolute (i.e. start
+    with a slash) or they need to start with an asterisk in order
+    to match anything. So any such pattern that doesn't have either
+    will have an asterisk prepended.
+    Directories which match an ignore pattern will not be traversed.
+    Paths can be specified to ignore only from specific directories,
+    e.g. '*/test/*.out'.
+    Default ignore globs: '*~', '*.pyc', '#*', '.*' '*.OLD' 'OLD'
+    \b
+    touch_file: file to be touchead with the latest date
+    paths_to_check: paths to search for the latest change date
+    \f
+    :param touch_file: file to be touchead with the latest date
+    :param paths_to_check: paths to search for the latest change date
+    :param ignore_patterns: glob patterns to ignore
+    :param ignore_pattern_files: files of glob patterns to ignore
+    :param no_default_ignore: if True do not include default glob patterns
+    """
+    all_ignore_patterns = IgnorePatterns()
+    if not no_default_ignore:
+        all_ignore_patterns.add_patterns(DEFAULT_IGNORE_GLOBS)
+    for fn in ignore_pattern_files:
+        with open(fn) as fh:
+            all_ignore_patterns.add_patterns(fh)
+    all_ignore_patterns.add_patterns(ignore_patterns)
+    latest_timestamp = 0
+    for path in paths_to_check:
+        apath = os.path.abspath(path)
+        for root, dirs, files in os.walk(apath):
+            dirs[:] = [dn for dn in dirs if not all_ignore_patterns.ignore(root, dn)]
+            for fn in files:
+                if not all_ignore_patterns.ignore(root, fn):
+                    statinfo = os.stat(os.path.join(root, fn))
+                    if statinfo.st_mtime > latest_timestamp:
+                        latest_timestamp = int(statinfo.st_mtime)
+    if not os.path.exists(touch_file):
+        with open(touch_file, "w"):
+            pass
+    os.utime(touch_file, (latest_timestamp, latest_timestamp))
+class IgnorePatterns:
+    """
+    Class to handle checking glob patterns to be ignored
+    """
+    def __init__(self, patterns: Iterable[str] = ()) -> None:
+        self.names: list[str] = []
+        self.paths: list[str] = []
+        self.add_patterns(patterns)
+    def add_patterns(self, patterns: Iterable[str]) -> None:
+        """
+        Add these patterns to the list of glob patterns to be ignored.
+        Whitespace is stripped from the ends of each pattern since they may have been read from a file.
+        Patterns that contain a '/' must either start with a '/' (i.e. be absolute), of start
+        with a '*', or there is no chance of a match.
+        Accordingly, patterns that contain a '/' and start with something else have '*' prepended to them.
+        :param patterns: list of glob patterns to be ignored
+        :return: None
+        """
+        for pattern in patterns:
+            pattern = pattern.strip()
+            if "/" in pattern:
+                if pattern[0] not in "/*":
+                    pattern = "*" + pattern
+                self.paths.append(pattern)
+            else:
+                self.names.append(pattern)
+    def ignore(self, dn: str, fn: str) -> bool:
+        """
+        Check to see whether to exclude the file named fn in directory namded dn
+        should be ignored or not.
+        :param dn: directory name
+        :param fn: file name
+        :return: True if the path matches an ignore pattern, False otherwise
+        """
+        for ignore_name in self.names:
+            if fnmatch(fn, ignore_name):
+                return True
+        path = os.path.join(dn, fn)
+        for ignore_path in self.paths:
+            if fnmatch(path, ignore_path):
+                return True
+        return False
+if __name__ == "__main__":
+    touch_latest()  # pragma: no cover

malcolm3utils-0.5.5.dist-info/LICENCE ADDED Viewed

@@ -0,0 +1,20 @@
+The MIT License (MIT)
+Copyright (c) 2021 Malcolm E. Davis <mnjjunk@comcast.net>
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

malcolm3utils-0.5.5.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,131 @@
+Metadata-Version: 2.3
+Name: malcolm3utils
+Version: 0.5.5
+Summary: Collection of Utility Scripts and Packages
+License: BSD-3-Clause
+Author: Malcolm E. Davis
+Author-email: mnjjunk@comcast.net
+Requires-Python: >=3.9,<4.0
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Desktop Environment
+Classifier: Typing :: Typed
+Requires-Dist: click-logging (>=1.0.1,<2.0.0)
+Project-URL: Documentation, https://malcolm-3.github.io/malcolm3utils
+Project-URL: Homepage, https://malcolm-3.github.io/malcolm3utils
+Project-URL: Repository, https://github.com/malcolm-3/malcolm3utils
+Description-Content-Type: text/markdown
+# Malcolm3Utils
+[![PyPI](https://img.shields.io/pypi/v/malcolm3utils?style=flat-square)](https://pypi.python.org/pypi/malcolm3utils/)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/malcolm3utils?style=flat-square)](https://pypi.python.org/pypi/malcolm3utils/)
+[![PyPI - License](https://img.shields.io/pypi/l/malcolm3utils?style=flat-square)](https://pypi.python.org/pypi/malcolm3utils/)
+[![Coookiecutter - Wolt](https://img.shields.io/badge/cookiecutter-Wolt-00c2e8?style=flat-square&logo=cookiecutter&logoColor=D4AA00&link=https://github.com/woltapp/wolt-python-package-cookiecutter)](https://github.com/woltapp/wolt-python-package-cookiecutter)
+---
+**Documentation**: [https://malcolm-3.github.io/malcolm3utils](https://malcolm-3.github.io/malcolm3utils)
+**Source Code**: [https://github.com/malcolm-3/malcolm3utils](https://github.com/malcolm-3/malcolm3utils)
+**PyPI**: [https://pypi.org/project/malcolm3utils/](https://pypi.org/project/malcolm3utils/)
+---
+Collection of Utility Scripts and Packages
+## Installation
+```sh
+pip install malcolm3utils
+```
+## Usage
+This package provides the following command line tools
+- ``touch_latest``
+  - This touches a marker file with the timestamp of the most recently changed file under the specified directories
+- ``getcol``
+  - A tool for extracting columns of data by column header name or column id
+- ``merge``
+  - A version of the ``join`` command that doesn't require pre-sorting
+## Development
+* Clone this repository
+* Requirements:
+  * [Poetry](https://python-poetry.org/)
+  * Python 3.9+
+* Create a virtual environment and install the dependencies
+```sh
+poetry install
+```
+* Activate the virtual environment
+```sh
+poetry shell
+```
+### Testing
+```sh
+pytest
+```
+### Documentation
+The documentation is automatically generated from the content of the `docs` directory and from the docstrings
+ of the public signatures of the source code. The documentation is updated and published as a [Github project page
+ ](https://pages.github.com/) automatically as part each release.
+### Releasing
+Trigger the [Draft release workflow](https://github.com/malcolm-3/malcolm3utils/actions/workflows/draft_release.yml)
+(press _Run workflow_). This will update the changelog & version and create a GitHub release which is in _Draft_ state.
+Find the draft release from the
+[GitHub releases](https://github.com/malcolm-3/malcolm3utils/releases) and publish it. When
+ a release is published, it'll trigger [release](https://github.com/malcolm-3/malcolm3utils/blob/master/.github/workflows/release.yml) workflow which creates PyPI
+ release and deploys updated documentation.
+### Pre-commit
+Pre-commit hooks run all the auto-formatters (e.g. `black`, `isort`), linters (e.g. `mypy`, `flake8`), and other quality
+ checks to make sure the changeset is in good shape before a commit/push happens.
+You can install the hooks with (runs for each commit):
+```sh
+pre-commit install
+```
+Or if you want them to run only for each push:
+```sh
+pre-commit install -t pre-push
+```
+Or if you want e.g. want to run all checks manually for all files:
+```sh
+pre-commit run --all-files
+```
+---
+This project was generated using the [python-package-cookiecutter](https://github.com/collijk/python-package-cookiecutter) template.

malcolm3utils-0.5.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+malcolm3utils/__init__.py,sha256=nabtuXFhto3Ig2RSu2WL4sr9-DD72QlX0S2cPaJxjCQ,90
+malcolm3utils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+malcolm3utils/scripts/getcol.py,sha256=Q_fszFHI1NmbN8v-Va13UjctnmL63m8HFLxPHNQCK9w,3087
+malcolm3utils/scripts/merge.py,sha256=oUv26eHqByEsLWj05j85Ayn51G8BlqAAOv4M1if2S_c,6358
+malcolm3utils/scripts/touch_latest.py,sha256=Wb9YJKcrsp_mGuJkYYRb0tjKsRZwiay2BGhaZLXZZCc,5273
+malcolm3utils-0.5.5.dist-info/LICENCE,sha256=LGjd6BjR_IDgNeEnz7XOVD2CQNpiGcwxZidsFvP0KiM,1105
+malcolm3utils-0.5.5.dist-info/METADATA,sha256=yTzsH1ks7mUeGXZlYcAa3zFGSbeMTuEKE7COyGt2K0c,4395
+malcolm3utils-0.5.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+malcolm3utils-0.5.5.dist-info/entry_points.txt,sha256=FSdDidQCKW8lbuRjSZOO5ef76lcCNk5DhJWMtxuXXq4,163
+malcolm3utils-0.5.5.dist-info/RECORD,,

malcolm3utils-0.5.5.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 2.1.3
+Root-Is-Purelib: true
+Tag: py3-none-any

malcolm3utils-0.5.5.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,5 @@
+[console_scripts]
+getcol=malcolm3utils.scripts.getcol:getcol
+merge=malcolm3utils.scripts.merge:merge
+touch_latest=malcolm3utils.scripts.touch_latest:touch_latest