PyPI - malcolm3utils - Versions diffs - 0.5.6__tar.gz → 0.6.0__tar.gz - Mend

malcolm3utils 0.5.6tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,25 +1,31 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: malcolm3utils
-Version: 0.5.6
+Version: 0.6.0
 Summary: Collection of Utility Scripts and Packages
 License: BSD-3-Clause
+License-File: LICENCE
 Author: Malcolm E. Davis
 Author-email: mnjjunk@comcast.net
-Requires-Python: >=3.9,<4.0
+Requires-Python: >=3.11,<4.0
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Programming Language :: Python :: 3.10
 Classifier: Topic :: Desktop Environment
 Classifier: Typing :: Typed
+Requires-Dist: click (>=8.3.1,<9.0.0)
 Requires-Dist: click-logging (>=1.0.1,<2.0.0)
+Requires-Dist: lark (>=1.3.1,<2.0.0)
+Requires-Dist: pandas (>=3.0.0,<4.0.0)
+Requires-Dist: requests (>=2.32.4)
+Requires-Dist: urllib3 (>=2.6.3)
 Project-URL: Documentation, https://malcolm-3.github.io/malcolm3utils
 Project-URL: Homepage, https://malcolm-3.github.io/malcolm3utils
 Project-URL: Repository, https://github.com/malcolm-3/malcolm3utils
@@ -61,6 +67,8 @@ This package provides the following command line tools
   - A tool for extracting columns of data by column header name or column id
 - ``merge``
   - A version of the ``join`` command that doesn't require pre-sorting
+- ``filter``
+  - Filter csv files using expressions containing the column headers
 ## Development

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/README.md RENAMED Viewed

@@ -34,6 +34,8 @@ This package provides the following command line tools
   - A tool for extracting columns of data by column header name or column id
 - ``merge``
   - A version of the ``join`` command that doesn't require pre-sorting
+- ``filter``
+  - Filter csv files using expressions containing the column headers
 ## Development

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "malcolm3utils"
-version = "0.5.6"
+version = "0.6.0"
 description = "Collection of Utility Scripts and Packages"
 authors = [
     "Malcolm E. Davis <mnjjunk@comcast.net>",
@@ -30,8 +30,13 @@ packages = [
 ]
 [tool.poetry.dependencies]
-python = ">=3.9, <4.0"
+python = ">=3.11, <4.0"
 click-logging = "^1.0.1"
+requests = ">=2.32.4"
+urllib3 = ">=2.6.3"
+pandas = "^3.0.0"
+click = "^8.3.1"
+lark = "^1.3.1"
 [tool.poetry.group.dev.dependencies]
 autoflake = "*"
@@ -58,6 +63,7 @@ python-kacl = "*"
 pyupgrade = "*"
 tryceratops = "*"
 setuptools = "*"
+pandas-stubs = "^3.0.0.260204"
 [build-system]
 requires = ["poetry-core>=1.0.0"]
@@ -68,7 +74,7 @@ profile = "black"
 src_paths = ["src", "tests"]
 [tool.black]
-target-version = ["py39"]
+target-version = ["py310"]
 include = '\.pyi?$'
 [tool.pytest.ini_options]
@@ -77,7 +83,6 @@ pythonpath = [
 ]
 addopts = """\
     --cov malcolm3utils \
-    --cov tests \
     --cov-report term-missing \
     --no-cov-on-fail \
 """
@@ -99,6 +104,10 @@ warn_redundant_casts = true
 warn_return_any = true
 check_untyped_defs = true
 show_error_codes = true
+exclude = [
+  "tests",
+  "venv",
+]
 [[tool.mypy.overrides]]
 module = "click_logging"
@@ -112,3 +121,4 @@ replace = '__version__ = "{new_version}"'
 touch_latest = 'malcolm3utils.scripts.touch_latest:touch_latest'
 getcol = 'malcolm3utils.scripts.getcol:getcol'
 merge = 'malcolm3utils.scripts.merge:merge'
+filter = 'malcolm3utils.scripts.filter:cli'

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/src/malcolm3utils/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
-__version__ = "0.5.6"
+__version__ = "0.6.0"
 __version_message__ = "%(prog)s, malcolm3utils version %(version)s"

malcolm3utils-0.6.0/src/malcolm3utils/scripts/__init__.py ADDED Viewed

File without changes

malcolm3utils-0.6.0/src/malcolm3utils/scripts/filter.py ADDED Viewed

@@ -0,0 +1,139 @@
+import logging
+import sys
+from csv import DictReader, DictWriter
+from io import TextIOWrapper
+from typing import Any, TextIO, Tuple
+import click
+import click_logging
+from malcolm3utils import __version__, __version_message__
+from malcolm3utils.utils.filter_parser import create_filter
+logger = logging.getLogger()
+@click.command(
+    "csv-filter",
+    help="""
+    Filter the input csv files.
+    The filter expression can be a combination of simple
+    arithmatic and logical expressions that will evaluate to
+    True or False, with a numerical answer being True if
+    non-zero.  Fieldnames can be used as variables in
+    this expression.
+    For example 'age + 1 < 4' would check each row and
+    see if row['age'] + 1 is less than 4, and keep or discard
+    rows for which the expression is True.
+    \b
+    Available operators are:
+    +, -, *, /, %, //, ==, !=, <, <=, >, >=, not, and, or
+    String literals can be specified using single quotes.
+    Field names with spaces should be surrounded by double quotes.
+    If no csv_files are specified, read from stdin.
+    If no --output specified, write to stdout.
+    Input files do not all have to have the same columns.
+    The output will have all columns.
+    To achieve this all csv_files are opened at program initiation.
+    This may cause problems with your system's open file limit if
+    you are attempting to filter a large number of files at once.
+    """,
+)
+@click.argument(
+    "filter_expression",
+    type=str,
+    required=True,
+)
+@click.argument(
+    "csv_files",
+    type=click.Path(exists=True),
+    nargs=-1,
+    metavar="csv_file",
+    required=False,
+)
+@click.option(
+    "--keep/--discard",
+    is_flag=True,
+    help="keep or discard entries for which the expression is true (default=keep)",
+    default=True,
+)
+@click.option(
+    "--output",
+    type=click.Path(exists=False),
+    help="output file name",
+)
+@click.option(
+    "-d",
+    "--delimiter",
+    type=str,
+    help="column delimiter (default=COMMA)",
+    default=",",
+)
+@click.option(
+    "--output-delimiter",
+    type=str,
+    help="output column delimiter (default=input delimiter)",
+)
+@click.version_option(__version__, message=__version_message__)
+@click_logging.simple_verbosity_option(logger)
+def cli(  # noqa: C901
+    filter_expression: str,
+    csv_files: Tuple[click.Path, ...] = (),
+    keep: bool = True,
+    output: click.Path | None = None,
+    delimiter: str = ",",
+    output_delimiter: str | None = None,
+) -> None:
+    if output_delimiter is None:
+        output_delimiter = delimiter
+    filter_function = create_filter(filter_expression)
+    input_fhs = []
+    readers = []
+    fieldnames = []
+    output_fh = None
+    try:
+        if output is None:  # pragma: no cover
+            output_fh = sys.stdout
+        else:
+            output_fh = open(str(output), "w")
+        if csv_files:
+            for csv_file in csv_files:
+                input_fh: TextIOWrapper[Any] | TextIO | Any = open(str(csv_file))
+                input_fhs.append(input_fh)
+                reader = DictReader(input_fh, delimiter=delimiter)
+                if reader.fieldnames is not None:
+                    fieldnames.extend(
+                        [x for x in reader.fieldnames if x not in fieldnames]
+                    )
+                readers.append(reader)
+        else:  # pragma: no cover
+            input_fh = sys.stdin
+            input_fhs.append(input_fh)
+            reader = DictReader(input_fh)
+            if reader.fieldnames is not None:
+                fieldnames.extend(reader.fieldnames)
+            readers.append(reader)
+        writer = DictWriter(
+            output_fh, fieldnames=fieldnames, delimiter=output_delimiter
+        )
+        writer.writeheader()
+        for reader in readers:
+            for row in reader:
+                if filter_function(row) == keep:
+                    writer.writerow(row)
+    finally:
+        for input_fh in input_fhs:
+            input_fh.close()
+        if output_fh is not None:
+            output_fh.close()

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/src/malcolm3utils/scripts/getcol.py RENAMED Viewed

@@ -10,16 +10,14 @@ import click
 from .. import __version__, __version_message__
-@click.command(
-    help="""
+@click.command(help="""
 Read the specified file and write out just the specified columns to stdout.
 The column_spec is a comma separated list of column headers, column indexes (one-based),
 or column ranges (e.g. 4-6 for columns 4 through 6 inclusive).
 If no file_to_read is specified, then input is read from stdin.
-"""
-)
+""")
 @click.option(
     "-d", "--delimiter", type=str, help="column delimiter (default=TAB)", default="\t"
 )

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/src/malcolm3utils/scripts/merge.py RENAMED Viewed

@@ -14,8 +14,7 @@ logger = logging.getLogger(__name__)
 click_logging.basic_config(logger)
-@click.command(
-    help="""
+@click.command(help="""
 Merge the specified delimited files with column headings, joining entries with
 the same key field value.
@@ -34,8 +33,7 @@ header will be the header from the first file.
 If -k is used to specify alternative keys columns for subsequent files, but
 those files have a column with the same name as the output key column, that
 will be ignored.
-"""
-)
+""")
 @click_logging.simple_verbosity_option(logger)
 @click.option(
     "-d", "--delimiter", type=str, help="column delimiter (default=TAB)", default="\t"

malcolm3utils-0.6.0/src/malcolm3utils/utils/__init__.py ADDED Viewed

File without changes

malcolm3utils-0.6.0/src/malcolm3utils/utils/csv.py ADDED Viewed

@@ -0,0 +1,77 @@
+import logging  # noqa: A005
+from pathlib import Path
+from typing import Any, Callable, Hashable
+import pandas as pd
+logger = logging.getLogger(__name__)
+def read_keyed_csv_data(
+    csv_file: Path,
+    keyfield: str,
+    skiprows: list[int] | int | Callable[[Hashable], bool] | None = None,
+    multiple: bool = False,
+) -> dict[Any, dict[str, Any]] | dict[Any, list[dict[str, Any]]]:
+    """
+    Instead of using DictReader which imports all values as strings,
+    we use pandas.read_csv which handles all of the data conversion
+    Values are returned as a keyed dictionary rather than a list
+    as we may need to be able to look up the entries by key.
+    Because MDBs do not support boolean values, we convert all
+    boolean values to integer 0/1 fields.
+    Skiprows can be
+        - a list of 0-based line indexes to skip
+        - a integer giving the number of initial lines to skip
+        - a callable that takes the line index and returns True to skip that line
+        - None to skip no rows (default)
+    If multiple is true, then the value of the nested dict will be a list
+    with each row that matches the key being appended to that list.
+    :param csv_file: CSV file to be read.
+    :param keyfield: Field to use as the key in the returned dictionary.
+    :param skiprows: rows to skip
+    :return: keyed dictionary of each row of data.
+    :param multiple: indicates there may be multiple rows for each key
+    """
+    if multiple:
+        result: dict[str, list[dict[str, Any]]] = {}
+        for entry in read_csv_data(csv_file, skiprows=skiprows):
+            key = entry[keyfield]
+            if key not in result:
+                result[key] = []
+            result[key].append(entry)
+        return result
+    else:
+        return {x[keyfield]: x for x in read_csv_data(csv_file)}
+def read_csv_data(
+    csv_file: Path,
+    skiprows: list[int] | int | Callable[[Hashable], bool] | None = None,
+) -> list[dict[str, Any]]:
+    """
+    Use Pandas to read a CSV into a simple list of dictionaries.
+    Rows can be filtered by specifying a skiprows option.
+    Skiprows can be
+        - a list of 0-based line indexes to skip
+        - a integer giving the number of initial lines to skip
+        - a callable that takes the line index and returns True to skip that line
+        - None to skip no rows (default)
+    :param csv_file: file to be read
+    :param skiprows: rows to skip
+    :return: list of dictionary entries
+    """
+    logger.debug('...............reading CSV data from "%s"', csv_file)
+    pandas_csv_data = pd.read_csv(str(csv_file), skiprows=skiprows)
+    for key in pandas_csv_data.select_dtypes("bool").keys():
+        pandas_csv_data[key] = pandas_csv_data[key].astype(int)
+    return list(pandas_csv_data.transpose().to_dict().values())

malcolm3utils-0.6.0/src/malcolm3utils/utils/filter_parser.py ADDED Viewed

@@ -0,0 +1,177 @@
+import logging
+from typing import Any, Generator
+from lark import Lark, Transformer, v_args
+logger = logging.getLogger(__name__)
+filter_grammar = """
+    ?start: or_test_
+    ?or_test_: and_test_ ("or" and_test_)*    -> or_test
+    ?and_test_: not_test_ ("and" not_test_)*  -> and_test
+    ?not_test_: "not" not_test_               -> not_test
+              | "True"                        -> true
+              | "False"                       -> false
+              | sum                           -> num_test
+              | comp
+              | "(" or_test_ ")"
+    ?comp: sum "==" sum     -> eq
+         | sum "!=" sum     -> ne
+         | sum ">" sum      -> gt
+         | sum ">=" sum     -> ge
+         | sum "<" sum      -> lt
+         | sum "<=" sum     -> le
+         | "(" comp ")"
+    ?sum: product
+        | sum "+" product   -> add
+        | sum "-" product   -> sub
+    ?product: atom
+        | product "*" atom  -> mul
+        | product "/" atom  -> div
+        | product "%" atom  -> mod
+        | product "//" atom -> floordiv
+    ?atom: NUMBER           -> number
+         | STRING_LITERAL   -> strlit
+         | "-" atom         -> neg
+         | key              -> key
+         | "(" sum ")"
+    ?key: CNAME | ESCAPED_STRING
+    STRING_LITERAL: "'" _STRING_ESC_INNER "'"
+    %import common.ESCAPED_STRING
+    %import common._STRING_ESC_INNER
+    %import common.CNAME
+    %import common.NUMBER
+    %import common.WS_INLINE
+    %ignore WS_INLINE
+"""
+def to_number_or_string(s: Any) -> float | int | str:
+    if isinstance(s, float) or isinstance(s, int):
+        return s
+    elif isinstance(s, str):
+        try:
+            return int(s)
+        except ValueError:
+            try:
+                return float(s)
+            except ValueError:
+                return s
+    return str(s)
+def applyall(d: dict[str, Any], *args) -> Generator[bool, None, None]:  # type: ignore[no-untyped-def]
+    for a in args:
+        yield a(d)
+@v_args(inline=True)
+class FilterParser(Transformer):
+    # making methods static breaks the @v_args functionality
+    def or_test(self, *args):  # type: ignore[no-untyped-def]
+        logger.debug("Or test: %s", args)
+        return lambda d: any(applyall(d, *args))
+    def and_test(self, *args):  # type: ignore[no-untyped-def]
+        logger.debug("And test: %s", args)
+        return lambda d: all(applyall(d, *args))
+    def not_test(self, a):  # type: ignore[no-untyped-def]
+        logger.debug("Not test: %s", a)
+        return lambda d: not a(d)
+    def num_test(self, a):  # type: ignore[no-untyped-def]
+        logger.debug("Num test: %s", a)
+        return lambda d: a(d) != 0
+    def eq(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("Eq test: %s and %s", a, b)
+        return lambda d: a(d) == b(d)
+    def ne(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("Ne test: %s and %s", a, b)
+        return lambda d: a(d) != b(d)
+    def gt(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("Gt test: %s and %s", a, b)
+        return lambda d: a(d) > b(d)
+    def ge(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("Ge test: %s and %s", a, b)
+        return lambda d: a(d) >= b(d)
+    def lt(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("Lt test: %s and %s", a, b)
+        return lambda d: a(d) < b(d)
+    def le(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("Le test: %s and %s", a, b)
+        return lambda d: a(d) <= b(d)
+    def add(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("add: %s and %s", a, b)
+        return lambda d: a(d) + b(d)
+    def sub(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("sub: %s and %s", a, b)
+        return lambda d: a(d) - b(d)
+    def mul(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("mul: %s and %s", a, b)
+        return lambda d: a(d) * b(d)
+    def div(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("div: %s and %s", a, b)
+        return lambda d: a(d) / b(d)
+    def mod(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("mod: %s and %s", a, b)
+        return lambda d: a(d) % b(d)
+    def floordiv(self, a, b):  # type: ignore[no-untyped-def]
+        logger.debug("floordiv: %s and %s", a, b)
+        return lambda d: a(d) // b(d)
+    def number(self, value):  # type: ignore[no-untyped-def]
+        logger.debug("Number: %s", value)
+        return lambda d: to_number_or_string(value)
+    def strlit(self, v):  # type: ignore[no-untyped-def]
+        logger.debug("String: %s", v)
+        vv = v.strip("'")
+        return lambda d: vv
+    def neg(self, a):  # type: ignore[no-untyped-def]
+        logger.debug("Negation: %s", a)
+        return lambda d: -a(d)
+    def key(self, a):  # type: ignore[no-untyped-def]
+        logger.debug("Key: %s", a)
+        b = a.strip('"')
+        return lambda d: to_number_or_string(d[b])
+    def true(self):  # type: ignore[no-untyped-def]
+        logger.debug("True")
+        return lambda d: True
+    def false(self):  # type: ignore[no-untyped-def]
+        logger.debug("False")
+        return lambda d: False
+def create_filter(filter_spec: str):  # type: ignore[no-untyped-def]
+    """
+    Convert a expression string into a function that takes a dictionary as an argument
+    and returns a boolean
+    """
+    filter_parser = Lark(filter_grammar, parser="lalr", transformer=FilterParser())
+    filter_generator = filter_parser.parse
+    return filter_generator(filter_spec)

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/LICENCE RENAMED Viewed

File without changes

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/src/malcolm3utils/py.typed RENAMED Viewed

File without changes

{malcolm3utils-0.5.6 → malcolm3utils-0.6.0}/src/malcolm3utils/scripts/touch_latest.py RENAMED Viewed

File without changes

malcolm3utils 0.5.6__tar.gz → 0.6.0__tar.gz

malcolm3utils 0.5.6tar.gz → 0.6.0tar.gz