PyPI - csvpath - Versions diffs - 0.0.463__tar.gz → 0.0.465__tar.gz - Mend

csvpath 0.0.463tar.gz → 0.0.465tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

{csvpath-0.0.463 → csvpath-0.0.465}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: csvpath
-Version: 0.0.463
+Version: 0.0.465
 Summary: A declarative language for data extraction and validation of CSV files
 Author: David Kershaw
 Author-email: dk107dk@hotmail.com
@@ -25,6 +25,7 @@ Requires-Dist: lark (>=1.2.2,<2.0.0)
 Requires-Dist: pandas (>=2.2.2,<3.0.0)
 Requires-Dist: ply (>=3.11,<4.0)
 Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
+Requires-Dist: tabulate (>=0.9.0,<0.10.0)
 Project-URL: Csvpath.org, https://www.csvpath.org
 Project-URL: Github, https://github.com/dk107dk/csvpath
 Description-Content-Type: text/markdown
@@ -34,11 +35,6 @@ Description-Content-Type: text/markdown
 CsvPath defines a declarative syntax for inspecting and validating CSV files.
-Though much simpler, it is inspired by:
-- XPath. CsvPath is to CSV files like XPath is to XML files.
-- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
-- The way CSS selectors pick out HTML structures
 CsvPath' goal is to make it easy to:
 - Analyze the content and structure of a CSV
 - Validate that the file matches expectations
@@ -47,6 +43,11 @@ CsvPath' goal is to make it easy to:
 And do it all in an automation-friendly way.
+Though much simpler, it is inspired by:
+- XPath. CsvPath is to CSV files like XPath is to XML files.
+- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
+- The way CSS selectors pick out HTML structures
 CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
 Read more about CsvPath and see realistic CSV validation examples at <a href='https://www.csvpath.org'>csvpath.org</a>.

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/csvpath.py RENAMED Viewed

@@ -377,7 +377,7 @@ class CsvPath(CsvPathPublic, ErrorCollector):  # pylint: disable=R0902, R0904
         # CsvPaths will do this earlier but it stripped off
         # the comments so we won't find them again
         #
-        csvpath = MetadataParser().extract_metadata(instance=self, csvpath=csvpath)
+        csvpath = MetadataParser(self).extract_metadata(instance=self, csvpath=csvpath)
         #
         #
         #
@@ -436,7 +436,7 @@ class CsvPath(CsvPathPublic, ErrorCollector):  # pylint: disable=R0902, R0904
                 len(np),
             )
         path = np[0]
-        path = MetadataParser().extract_metadata(instance=self, csvpath=path)
+        path = MetadataParser(self).extract_metadata(instance=self, csvpath=path)
         path = self._update_file_path(path)
         dis = self.parse(path, disposably=disposably)
         if disposably is True:

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/csvpaths.py RENAMED Viewed

@@ -185,11 +185,14 @@ class CsvPaths(CsvPathsPublic):
         )
     def _load_csvpath(self, csvpath: CsvPath, path: str, file: str) -> None:
+        self.logger.debug("Beginning to load csvpath %s with file %s", path, file)
         # we strip comments from above the path so we need to extract them first
-        path = MetadataParser().extract_metadata(instance=csvpath, csvpath=path)
-        # csvpath._extract_metadata(path)
+        path = MetadataParser(self).extract_metadata(instance=csvpath, csvpath=path)
+        self.logger.debug("Csvpath after metadata extract: %s", path)
         f = path.find("[")
+        self.logger.debug("Csvpath matching part starts at char # %s", f)
         apath = f"${file}{path[f:]}"
+        self.logger.info("Parsing csvpath %s", apath)
         csvpath.parse(apath)
     def fast_forward_paths(self, *, pathsname, filename):
@@ -204,17 +207,25 @@ class CsvPaths(CsvPathsPublic):
         self.logger.info("Cleaning out any %s and %s results", filename, pathsname)
         self.clean(paths=pathsname)
         self.logger.info(
-            "Beginning fast_forward_paths %s with %s paths", pathsname, len(paths)
+            "Beginning fast_forward_paths %s with %s paths against file %s",
+            pathsname,
+            len(paths),
+            filename,
         )
         for i, path in enumerate(paths):
             csvpath = self.csvpath()
+            self.logger.info("Beginning CsvPath instance: %s", csvpath)
             result = CsvPathResult(
                 csvpath=csvpath, file_name=filename, paths_name=pathsname
             )
             try:
                 self.results_manager.add_named_result(result)
                 self._load_csvpath(csvpath, path=path, file=file)
-                self.logger.info("Parsed csvpath %s pointed at %s", i, file)
+                self.logger.info(
+                    "Parsed csvpath %s pointed at %s and starting to fast-forward",
+                    i,
+                    file,
+                )
                 csvpath.fast_forward()
                 self.logger.info(
                     "Completed fast forward of csvpath %s against %s", i, file

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/managers/csvpaths_manager.py RENAMED Viewed

@@ -26,7 +26,7 @@ class CsvPathsManager(ABC):
         contents of the file is straight cvspath, not json."""
     @abstractmethod
-    def set_named_paths_from_json(self, file_path: str) -> None:
+    def add_named_paths_from_json(self, file_path: str) -> None:
         """replaces the named paths dict with a dict found in a JSON file. lists
         of paths are keyed by names."""
@@ -101,6 +101,7 @@ class PathsManager(CsvPathsManager):  # pylint: disable=C0115, C0116
             ErrorHandler(self.csvpaths).handle_error(ie)
     def add_named_paths_from_file(self, *, name: str, file_path: str) -> None:
+        self.csvpaths.logger.debug("Reading csvpaths file at %s", file_path)
         with open(file_path, "r", encoding="utf-8") as f:
             cp = f.read()
             _ = [
@@ -108,12 +109,15 @@ class PathsManager(CsvPathsManager):  # pylint: disable=C0115, C0116
                 for apath in cp.split(PathsManager.MARKER)
                 if apath.strip() != ""
             ]
+            self.csvpaths.logger.debug("Found %s csvpaths in file", len(_))
             self.add_named_paths(name, _)
-    def set_named_paths_from_json(self, file_path: str) -> None:
+    def add_named_paths_from_json(self, file_path: str) -> None:
         try:
+            self.csvpaths.logger.debug("Opening JSON file at %s", file_path)
             with open(file_path, encoding="utf-8") as f:
                 j = json.load(f)
+                self.csvpaths.logger.debug("Found JSON file with %s keys", len(j))
                 for k in j:
                     v = j[k]
                     for f in v:
@@ -130,14 +134,24 @@ class PathsManager(CsvPathsManager):  # pylint: disable=C0115, C0116
                                  set_named_paths_from_json."""
             )
             ErrorHandler(self.csvpaths).handle_error(ie)
+        self.csvpaths.logger.debug("Adding csvpaths to named-paths group %s", name)
         if name in self.named_paths:
             for p in paths:
                 if p in self.named_paths[name]:
+                    self.csvpaths.logger.debug(
+                        "csvpaths %s already exists in named-paths group %s", p, name
+                    )
                     pass
                 else:
-                    self.named_paths[name].append(paths)
+                    self.csvpaths.logger.debug("Adding %s to %s", p, name)
+                    if isinstance(self.named_paths[name], str):
+                        ps = []
+                        ps.append(self.named_paths[name])
+                        self.named_paths[name] = ps
+                    self.named_paths[name].append(p)
         else:
+            for _ in paths:
+                self.csvpaths.logger.debug("Adding %s to %s", _, name)
             self.named_paths[name] = paths
     #

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/all.py RENAMED Viewed

@@ -4,7 +4,7 @@ from typing import Any
 from csvpath.matching.productions import Equality
 from csvpath.matching.util.exceptions import ChildrenException
 from ..function_focus import MatchDecider
-from ..misc.variables import Variables
+from ..variables.variables import Variables
 from ..headers.headers import Headers

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/boolean/any.py RENAMED Viewed

@@ -2,7 +2,7 @@
 from csvpath.matching.productions import Equality, Term
 from csvpath.matching.util.exceptions import ChildrenException
 from csvpath.matching.util.expression_utility import ExpressionUtility
-from ..misc.variables import Variables
+from ..variables.variables import Variables
 from ..function_focus import MatchDecider
 from ..headers.headers import Headers

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/counting/tally.py RENAMED Viewed

@@ -18,18 +18,24 @@ class Tally(ValueProducer):
         else:
             siblings = [child]
         tally = ""
         for _ in siblings:
             tally += f"{_.to_value(skip=skip)}|"
             value = f"{_.to_value(skip=skip)}"
             self._store(_.name, value)
         if len(siblings) > 1:
             self._store(
-                self.first_non_term_qualifier("tally"),
+                "",  # we don't need to pass a name. this data just
+                # goes under "tally" or the qualifier
                 tally[0 : len(tally) - 1],
             )
         self.value = True
     def _store(self, name, value):
+        if name == "":
+            name = self.first_non_term_qualifier("tally")
+        else:
+            name = f"""{self.first_non_term_qualifier("tally")}_{name}"""
         count = self.matcher.get_variable(name, tracking=value)
         if count is None:
             count = 0

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/function_factory.py RENAMED Viewed

@@ -1,12 +1,15 @@
 # pylint: disable=C0114
 from csvpath.matching.productions.expression import Matchable
 from .function import Function
+from .dates.now import Now
+from .dates.datef import Date
 from .strings.lower import Lower
 from .strings.upper import Upper
 from .strings.substring import Substring
 from .strings.starts_with import StartsWith
 from .strings.strip import Strip
 from .strings.length import Length, MinMaxLength
+from .strings.regex import Regex
 from .strings.concat import Concat
 from .strings.metaphone import Metaphone
 from .counting.count import Count
@@ -21,6 +24,8 @@ from .counting.increment import Increment
 from .headers.reset_headers import ResetHeaders
 from .headers.header_name import HeaderName
 from .headers.header_names_mismatch import HeaderNamesMismatch
+from .headers.collect import Collect
+from .headers.replace import Replace
 from .headers.headers import Headers
 from .headers.mismatch import Mismatch
 from .headers.end import End
@@ -50,6 +55,7 @@ from .stats.percent_unique import PercentUnique
 from .stats.stdev import Stdev
 from .stats.correlate import Correlate
 from .print.printf import Print
+from .print.table import HeaderTable, RowTable, VarTable
 from .print.print_line import PrintLine
 from .print.jinjaf import Jinjaf
 from .print.print_queue import PrintQueue
@@ -60,20 +66,15 @@ from .lines.dups import HasDups, DupLines, CountDups
 from .lines.first_line import FirstLine
 from .lines.advance import Advance
 from .lines.after_blank import AfterBlank
+from .variables.variables import Variables
+from .variables.pushpop import Push, PushDistinct, Pop, Peek, PeekSize, Stack
+from .variables.get import Get
+from .variables.track import Track
 from .misc.random import Random
-from .misc.regex import Regex
-from .misc.now import Now
-from .misc.variables import Variables
 from .misc.nonef import Nonef
-from .misc.pushpop import Push, PushDistinct, Pop, Peek, PeekSize, Stack
-from .misc.datef import Date
-from .misc.collect import Collect
-from .misc.replace import Replace
 from .misc.intf import Int
-from .misc.get import Get
-from .misc.track import Track
 from .misc.importf import Import
-from .misc.debug import Debug, BriefStackTrace, VoteStack, DoWhenStack
+from .testing.debug import Debug, BriefStackTrace, VoteStack, DoWhenStack
 from .validity.failed import Failed
 from .validity.fail import Fail
@@ -176,7 +177,14 @@ class FunctionFactory:
             f = AboveBelow(matcher, name, child)
         elif name == "first":
             f = First(matcher, name, child)
-        elif name in ["firstline", "firstmatch", "firstscan"]:
+        elif name in [
+            "firstline",
+            "firstmatch",
+            "firstscan",
+            "first_line",
+            "first_scan",
+            "first_match",
+        ]:
             f = FirstLine(matcher, name, child)
         elif name == "count_lines":
             f = CountLines(matcher, name, child)
@@ -337,6 +345,12 @@ class FunctionFactory:
             f = DoWhenStack(matcher, name, child)
         elif name == "metaphone":
             f = Metaphone(matcher, name, child)
+        elif name == "header_table":
+            f = HeaderTable(matcher, name, child)
+        elif name == "row_table":
+            f = RowTable(matcher, name, child)
+        elif name == "var_table":
+            f = VarTable(matcher, name, child)
         else:
             if (
                 f is None

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/functions/lines/first_line.py RENAMED Viewed

@@ -22,16 +22,16 @@ class FirstLine(MatchDecider):
     def _decide_match(self, skip=None) -> None:
         t = self.name
-        if t == "firstmatch":
+        if t in ["firstmatch", "first_match"]:
             if self.matcher.csvpath.match_count == 0 and self.line_matches():  # 1-based
                 self.match = True
             else:
                 self.match = False
-        elif t == "firstscan":
+        elif t in ["firstscan", "first_scan"]:
             self.match = (
                 self.matcher.csvpath.scan_count == 1
             )  # 1-based, set before matcher is called.
-        elif t == "firstline":
+        elif t in ["firstline", "first_line"]:
             self.match = (
                 self.matcher.csvpath.line_monitor.data_line_number == 0
             )  # 0-based, updated after matcher is called.

csvpath-0.0.465/csvpath/matching/functions/print/table.py ADDED Viewed

@@ -0,0 +1,133 @@
+# pylint: disable=C0114
+import textwrap
+from tabulate import tabulate
+from ..function_focus import SideEffect
+class HeaderTable(SideEffect):
+    """prints a header table"""
+    def check_valid(self) -> None:
+        self.validate_zero_args()
+        super().check_valid()
+    def _produce_value(self, skip=None) -> None:
+        self.value = self.matches(skip=skip)
+    def _decide_match(self, skip=None) -> None:
+        table = []
+        headers = ["#N", "#Name"]
+        for i, h in enumerate(self.matcher.csvpath.headers):
+            table.append([i, h])
+        self.matcher.csvpath.print(
+            tabulate(table, headers=headers, tablefmt="simple_grid")
+        )
+        self.match = self.default_match()
+class RowTable(SideEffect):
+    """prints a row table"""
+    def check_valid(self) -> None:
+        self.validate_zero_one_or_two_args()
+        super().check_valid()
+    def _produce_value(self, skip=None) -> None:
+        self.value = self.matches(skip=skip)
+    def _decide_match(self, skip=None) -> None:
+        v1 = self._value_one()
+        v2 = self._value_two()
+        i = -1
+        j = -1
+        if v1 is None and v2 is None:
+            i = 0
+            j = len(self.matcher.csvpath.headers)
+        elif v2 is None:
+            i = v1
+            j = i
+        else:
+            i = v1
+            j = v2
+        headers = []
+        row = None
+        print(f"tables.i: {i}, {j}")
+        if i == j:
+            headers.append(self.matcher.csvpath.headers[i])
+            row = [[self.matcher.line[i]]]
+        else:
+            for k, h in enumerate(self.matcher.csvpath.headers[i : j + 1]):
+                headers.append(f"#{h} (#{k + i})")
+            row = [self.matcher.line[i : j + 1]]
+        self.matcher.csvpath.print(
+            tabulate(row, headers=headers, tablefmt="simple_grid")
+        )
+        self.match = self.default_match()
+class VarTable(SideEffect):
+    """prints a variables table"""
+    def check_valid(self) -> None:
+        self.validate_zero_or_more_args()
+        super().check_valid()
+    def _produce_value(self, skip=None) -> None:
+        self.value = self.matches(skip=skip)
+    def _decide_match(self, skip=None) -> None:
+        v1 = self._value_one()
+        v2 = self._value_two()
+        if v1 is None:
+            self.print_all_vars()
+        elif v2 is None:
+            self.print_one_var()
+        else:
+            self.print_some_vars(skip)
+        self.match = self.default_match()
+    def print_all_vars(self):
+        headers = []
+        rows = [[]]
+        for k, v in self.matcher.csvpath.variables.items():
+            headers.append(k)
+            v = str(v)
+            if len(v) > 20:
+                v = textwrap.fill(v, width=20)
+            rows[0].append(v)
+        self.matcher.csvpath.print(
+            tabulate(rows, headers=headers, tablefmt="simple_grid")
+        )
+    def print_one_var(self):
+        h = self._value_one()
+        headers = [h]
+        rows = []
+        v = self.matcher.csvpath.variables[h]
+        if isinstance(v, list):
+            for a in v:
+                rows.append([a])
+        elif isinstance(v, dict):
+            headers.append("Tracking")
+            for k, _ in v.items():
+                rows.append([k, _])
+        self.matcher.csvpath.print(
+            tabulate(rows, headers=headers, tablefmt="simple_grid")
+        )
+    def print_some_vars(self, skip):
+        siblings = self[0].commas_to_list()
+        headers = []
+        for s in siblings:
+            headers.append(s.to_value(skip=skip))
+        rows = []
+        for h in headers:
+            v = self.matcher.csvpath.variables[h]
+            v = f"{v}"
+            if len(v) > 30:
+                v = textwrap.fill(v, width=30)
+            rows.append([v])
+        self.matcher.csvpath.print(
+            tabulate(rows, headers=headers, tablefmt="simple_grid")
+        )

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/matching/productions/equality.py RENAMED Viewed

@@ -88,10 +88,13 @@ class Equality(Matchable):
     def commas_to_list(self) -> List[Any]:
         """gets the children of op==',' equalities as a list of args"""
+        """
         ls = []
         for _ in self.children:
             ls.append(_)
         return ls
+        """
+        return self.children[:]
     def set_operation(self, op):  # pylint: disable=C0116
         self.op = op

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/metadata_parser.py RENAMED Viewed

@@ -1,7 +1,46 @@
+from typing import Tuple
+from .config_exception import ConfigurationException
+from .exceptions import InputException
 class MetadataParser:
+    def __init__(self, csvpath) -> None:
+        if not hasattr(csvpath, "logger"):
+            raise ConfigurationException(
+                "Log holder cannot be Nothing. You must pass a CsvPaths or CsvPath to MetadataParser."
+            )
+        self.log_holder = csvpath
     def extract_metadata(self, *, instance, csvpath: str) -> str:
-        """extracts metadata from comments. the comments are removed."""
+        """extracts metadata from a comment. the comment is removed.
+        at this time we're expecting 0 or 1 comments above the csvpath.
+        we do not look below or for secondary comments. both would
+        cause errors. we are also not looking within the csvpath. that
+        is handled by the matcher's parser and we do not collect
+        metadata from internal comments at this time. in principle we
+        could run the comments the matching parser finds through this
+        parser in order to extract metadata fields. not today's problem
+        though.
+        """
+        self.log_holder.logger.debug(
+            "Beginning to extract metadata from csvpath: %s", csvpath
+        )
+        csvpath = csvpath.strip()
+        if not csvpath[0] in ["$", "~"]:
+            raise InputException(f"Csvpath must start with ~ or $, not {csvpath[0]}")
+        csvpath2, comment = self.extract_csvpath_and_comment(csvpath)
+        comment = comment.strip()
+        # if there are any characters in the comment we should parse. 3 is
+        # the minimum metadata, because "x:y", but there could be a number or something.
+        if len(comment) > 0:
+            self.collect_metadata(instance, comment)
+            # keep the original comment for future ref
+            if not instance.metadata:
+                instance.metadata = {}
+            instance.metadata["original_comment"] = comment
+        return csvpath2
+    def extract_csvpath_and_comment(self, csvpath) -> Tuple[str, str]:
         csvpath2 = ""
         comment = ""
         state = 0  # 0 == outside, 1 == outer comment, 2 == inside
@@ -37,7 +76,9 @@ class MetadataParser:
                     comment += c
                 elif state == 2:
                     csvpath2 += c
+        return csvpath2, comment
+    def collect_metadata(self, instance, comment) -> None:
         #
         # pull the metadata out of the comment
         #
@@ -69,13 +110,15 @@ class MetadataParser:
                         metafield += c
                 current_word = ""
             else:
+                if metafield is not None:
+                    metafield += c
                 current_word = ""
         if metaname:
             metadata_fields[metaname] = (
                 metafield.strip() if metafield is not None else None
             )
-        if len(metadata_fields) > 0:
-            instance.metadata = metadata_fields
-        return csvpath2
+        # add found metadata to instance. keys will overwrite preexisting.
+        if not instance.metadata:
+            instance.metadata = {}
+        for k, v in metadata_fields.items():
+            instance.metadata[k] = v

{csvpath-0.0.463 → csvpath-0.0.465}/csvpath/util/printer.py RENAMED Viewed

@@ -57,6 +57,8 @@ class StdOutPrinter(Printer):
 class TestPrinter(Printer):
+    __test__ = False
     def __init__(self):
         self.lines = []

{csvpath-0.0.463 → csvpath-0.0.465}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "csvpath"
-version = "0.0.463"
+version = "0.0.465"
 description = "A declarative language for data extraction and validation of CSV files"
 authors = ["David Kershaw <dk107dk@hotmail.com>"]
 readme = "README.md"
@@ -32,6 +32,7 @@ jinja2 = "^3.1.4"
 inflect = "^7.3.1"
 lark = "^1.2.2"
 jellyfish = "^1.1.0"
+tabulate = "^0.9.0"
 [tool.poetry.group.dev.dependencies]
 flake8 = "^7.1.0"

{csvpath-0.0.463 → csvpath-0.0.465}/LICENSE RENAMED Viewed

File without changes

{csvpath-0.0.463 → csvpath-0.0.465}/README.md RENAMED Viewed

@@ -3,11 +3,6 @@
 CsvPath defines a declarative syntax for inspecting and validating CSV files.
-Though much simpler, it is inspired by:
-- XPath. CsvPath is to CSV files like XPath is to XML files.
-- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
-- The way CSS selectors pick out HTML structures
 CsvPath' goal is to make it easy to:
 - Analyze the content and structure of a CSV
 - Validate that the file matches expectations
@@ -16,6 +11,11 @@ CsvPath' goal is to make it easy to:
 And do it all in an automation-friendly way.
+Though much simpler, it is inspired by:
+- XPath. CsvPath is to CSV files like XPath is to XML files.
+- Validation of XML using <a href='https://schematron.com/'>Schematron rules</a>
+- The way CSS selectors pick out HTML structures
 CsvPath is intended to fit with other DataOps and data quality tools. Files are streamed. The interface is simple. New functions are easy to create.
 Read more about CsvPath and see realistic CSV validation examples at <a href='https://www.csvpath.org'>csvpath.org</a>.