PyPI - csvpath - Versions diffs - 0.0.2__py3-none-any.whl - Mend

csvpath 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

csvpath/__init__.py +1 -0
csvpath/csvpath.py +368 -0
csvpath/matching/__init__.py +1 -0
csvpath/matching/expression_encoder.py +108 -0
csvpath/matching/expression_math.py +123 -0
csvpath/matching/expression_utility.py +29 -0
csvpath/matching/functions/above.py +36 -0
csvpath/matching/functions/add.py +24 -0
csvpath/matching/functions/below.py +36 -0
csvpath/matching/functions/concat.py +25 -0
csvpath/matching/functions/count.py +44 -0
csvpath/matching/functions/count_lines.py +12 -0
csvpath/matching/functions/count_scans.py +13 -0
csvpath/matching/functions/divide.py +30 -0
csvpath/matching/functions/end.py +18 -0
csvpath/matching/functions/every.py +33 -0
csvpath/matching/functions/first.py +46 -0
csvpath/matching/functions/function.py +31 -0
csvpath/matching/functions/function_factory.py +114 -0
csvpath/matching/functions/inf.py +38 -0
csvpath/matching/functions/is_instance.py +95 -0
csvpath/matching/functions/length.py +33 -0
csvpath/matching/functions/lower.py +21 -0
csvpath/matching/functions/minf.py +167 -0
csvpath/matching/functions/multiply.py +27 -0
csvpath/matching/functions/no.py +10 -0
csvpath/matching/functions/notf.py +26 -0
csvpath/matching/functions/now.py +33 -0
csvpath/matching/functions/orf.py +28 -0
csvpath/matching/functions/percent.py +29 -0
csvpath/matching/functions/random.py +33 -0
csvpath/matching/functions/regex.py +38 -0
csvpath/matching/functions/subtract.py +28 -0
csvpath/matching/functions/tally.py +36 -0
csvpath/matching/functions/upper.py +21 -0
csvpath/matching/matcher.py +215 -0
csvpath/matching/matching_lexer.py +66 -0
csvpath/matching/parser.out +1287 -0
csvpath/matching/parsetab.py +1427 -0
csvpath/matching/productions/equality.py +158 -0
csvpath/matching/productions/expression.py +16 -0
csvpath/matching/productions/header.py +30 -0
csvpath/matching/productions/matchable.py +41 -0
csvpath/matching/productions/term.py +11 -0
csvpath/matching/productions/variable.py +15 -0
csvpath/parser_utility.py +39 -0
csvpath/scanning/__init__.py +1 -0
csvpath/scanning/parser.out +1 -0
csvpath/scanning/parsetab.py +231 -0
csvpath/scanning/scanner.py +165 -0
csvpath/scanning/scanning_lexer.py +47 -0
csvpath-0.0.2.dist-info/METADATA +184 -0
csvpath-0.0.2.dist-info/RECORD +54 -0
csvpath-0.0.2.dist-info/WHEEL +4 -0

csvpath/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

csvpath/csvpath.py ADDED Viewed

@@ -0,0 +1,368 @@
+import csv
+from typing import List, Dict, Any
+from collections.abc import Iterator
+from csvpath.matching.matcher import Matcher
+from csvpath.matching.expression_encoder import ExpressionEncoder
+from csvpath.matching.expression_math import ExpressionMath
+from csvpath.scanning.scanner import Scanner
+class NoFileException(Exception):
+    pass
+class CsvPath:
+    def __init__(
+        self, *, filename=None, delimiter=",", quotechar='"', block_print=True
+    ):
+        self.filename = filename
+        self.scanner = None
+        self.value = None
+        self.scan = None
+        self.match = None
+        self.modify = None
+        self.headers = None
+        self.line_number = 0
+        self.scan_count = 0
+        self.match_count = 0
+        self.variables: Dict[str, Any] = {}
+        self.delimiter = delimiter
+        self.quotechar = quotechar
+        self.block_print = block_print
+        self.total_lines = -1
+        self._verbose = False
+        self._dump_json = False
+        self._do_math = False  # off by default, still experimental
+        self._collect_matchers = False
+        self.matchers = []
+        self.jsons = []
+    def dump_json(self):
+        self._dump_json = not self._dump_json
+    def parse(self, data):
+        self.scanner = Scanner()
+        s, mat, mod = self._find_scan_match_modify(data)
+        self.scan = s
+        self.match = mat
+        self.modify = mod
+        self.scanner.parse(s)
+        self._load_headers()
+        self.get_total_lines()
+        return self.scanner
+    def verbose(self, set_verbose: bool = True) -> None:
+        self._verbose = set_verbose
+    # prints what the user needs to see
+    def verbosity(self, msg: Any) -> None:
+        if self._verbose:
+            print(f"{msg}")
+    # prints what the developer needs to see
+    def print(self, msg: str) -> None:
+        if not self.block_print:
+            print(msg)
+    def _load_headers(self) -> None:
+        with open(self.scanner.filename, "r") as file:
+            reader = csv.reader(
+                file, delimiter=self.delimiter, quotechar=self.quotechar
+            )
+            for row in reader:
+                self.headers = row
+                break
+        hs = self.headers[:]
+        self.headers = []
+        for header in hs:
+            header = header.strip()
+            header = header.replace(";", "")
+            header = header.replace(",", "")
+            header = header.replace("|", "")
+            header = header.replace("\t", "")
+            header = header.replace("`", "")
+            self.headers.append(header)
+            self.verbosity(f"header: {header}")
+    def _find_scan_match_modify(self, data):
+        scan = ""
+        matches = ""
+        modify = ""
+        p = 0
+        for i, c in enumerate(data):
+            if p == 0:
+                scan = scan + c
+            elif p == 1:
+                matches = matches + c
+            else:
+                modify = modify + c
+            if c == "]":
+                p = p + 1
+        scan = scan.strip()
+        scan = scan if len(scan) > 0 else None
+        matches = matches.strip()
+        matches = matches if len(matches) > 0 else None
+        modify = modify.strip()
+        modify = modify if len(modify) > 0 else None
+        self.verbosity(f"scan: {scan}")
+        self.verbosity(f"matches: {matches}")
+        self.verbosity(f"modify: {modify}")
+        return scan, matches, modify
+    def __str__(self):
+        return f"""
+            path: {self.scanner.path}
+            filename: {self.filename}
+            parser: {self.scanner}
+            from_line: {self.scanner.from_line}
+            to_line: {self.scanner.to_line}
+            all_lines: {self.scanner.all_lines}
+            these: {self.scanner.these}
+        """
+    @property
+    def from_line(self):
+        return self.scanner.from_line
+    @property
+    def to_line(self):
+        return self.scanner.to_line
+    @property
+    def all_lines(self):
+        return self.scanner.all_lines
+    @property
+    def path(self):
+        return self.scanner.path
+    @property
+    def these(self):
+        return self.scanner.these
+    @property
+    def filename(self):
+        return self.file_name
+    @filename.setter
+    def filename(self, f):
+        self.file_name = f
+    def collect(self) -> List[List[Any]]:
+        lines = []
+        for _ in self.next():
+            _ = _[:]
+            lines.append(_)
+        return lines
+    def next(self):
+        if self.scanner.filename is None:
+            raise NoFileException("there is no filename")
+        self.verbosity(f"filename: {self.scanner.filename}")
+        total_lines = -1
+        if self._verbose:
+            total_lines = self.get_total_lines()
+            self.verbosity(f"total lines: {total_lines}")
+        with open(self.scanner.filename, "r") as file:
+            reader = csv.reader(
+                file, delimiter=self.delimiter, quotechar=self.quotechar
+            )
+            for line in reader:
+                self.verbosity(f"line number: {self.line_number} of {total_lines}")
+                if self.includes(self.line_number):
+                    self.scan_count = self.scan_count + 1
+                    self.print(f"CsvPath.next: line:{line}")
+                    self.verbosity(f"scan count: {self.scan_count}")
+                    if self.matches(line):
+                        self.match_count = self.match_count + 1
+                        self.verbosity(f"match count: {self.match_count}")
+                        yield line
+                self.line_number = self.line_number + 1
+    def get_total_lines(self) -> int:
+        if self.total_lines == -1:
+            with open(self.scanner.filename, "r") as file:
+                reader = csv.reader(
+                    file, delimiter=self.delimiter, quotechar=self.quotechar
+                )
+                for line in reader:
+                    self.total_lines += 1
+        return self.total_lines
+    def current_line_number(self) -> int:
+        return self.line_number
+    def current_scan_count(self) -> int:
+        return self.scan_count
+    def current_match_count(self) -> int:
+        return self.match_count
+    def do_math(self):
+        self._do_math = not self._do_math
+    def collect_matchers(self):
+        self._collect_matchers = not self._collect_matchers
+    def matches(self, line) -> bool:
+        if not self.match:
+            return True
+        self.print(f"CsvPath.matches: the match path: {self.match}")
+        matcher = Matcher(
+            csvpath=self, data=self.match, line=line, headers=self.headers
+        )
+        if self._do_math:
+            em = ExpressionMath()
+            for e in matcher.expressions:
+                em.do_math(e[0])
+        if self._dump_json:
+            jsonstr = ExpressionEncoder().valued_list_to_json(matcher.expressions)
+            self.jsons.append(jsonstr)
+        matched = matcher.matches()
+        if self._collect_matchers:  # and matched
+            self.matchers.append(matcher)
+        return matched
+    def set_variable(self, name: str, *, value: Any, tracking: Any = None) -> None:
+        if not name:
+            raise Exception("name cannot be None")
+        if name in self.variables:
+            self.print(f"CsvPath.set_variable: existing value: {self.variables[name]}")
+        else:
+            self.print("CsvPath.set_variable: no existing value")
+        if tracking is not None:
+            if name not in self.variables:
+                self.variables[name] = {}
+            instances = self.variables[name]
+            instances[tracking] = value
+        else:
+            self.variables[name] = value
+    def get_variable(
+        self, name: str, *, tracking: Any = None, set_if_none: Any = None
+    ) -> Any:
+        if not name:
+            raise Exception("name cannot be None")
+        thevalue = None
+        if tracking is not None:
+            thedict = None
+            if name in self.variables:
+                thedict = self.variables[name]
+                if not thedict:
+                    thedict = {}
+                    self.variables[name] = thedict
+                    thedict[tracking] = set_if_none
+            else:
+                thedict = {}
+                thedict[tracking] = set_if_none
+                self.variables[name] = thedict
+            thevalue = thedict.get(tracking)
+            if not thevalue and set_if_none is not None:
+                thedict[tracking] = set_if_none
+                thevalue = set_if_none
+        else:
+            if name not in self.variables:
+                self.variables[name] = set_if_none
+            thevalue = self.variables[name]
+        return thevalue
+    def includes(self, line: int) -> bool:
+        from_line = self.scanner.from_line
+        to_line = self.scanner.to_line
+        all_lines = self.scanner.all_lines
+        these = self.scanner.these
+        return self._includes(
+            line, from_line=from_line, to_line=to_line, all_lines=all_lines, these=these
+        )
+    def _includes(
+        self,
+        line: int,
+        *,
+        from_line: int = None,
+        to_line: int = None,
+        all_lines: bool = None,
+        these: List[int] = [],
+    ) -> bool:
+        if line is None:
+            return False
+        if from_line is None and all_lines:
+            return True
+        if from_line is not None and all_lines:
+            return line >= from_line
+        if from_line == line:
+            return True
+        if from_line is not None and to_line is not None and from_line > to_line:
+            return line >= to_line and line <= from_line
+        if from_line is not None and to_line is not None:
+            return line >= from_line and line <= to_line
+        if line in these:
+            return True
+        if to_line is not None:
+            return line < to_line
+        return False
+    def line_numbers(self) -> Iterator[int | str]:
+        these = self.scanner.these
+        from_line = self.scanner.from_line
+        to_line = self.scanner.to_line
+        all_lines = self.scanner.all_lines
+        return self._line_numbers(
+            these=these, from_line=from_line, to_line=to_line, all_lines=all_lines
+        )
+    def _line_numbers(
+        self,
+        *,
+        these: List[int] = [],
+        from_line: int = None,
+        to_line: int = None,
+        all_lines: bool = None,
+    ) -> Iterator[int | str]:
+        if len(these) > 0:
+            for i in these:
+                yield i
+        else:
+            if from_line is not None and to_line is not None and from_line > to_line:
+                for i in range(to_line, from_line + 1):
+                    yield i
+            elif from_line is not None and to_line is not None:
+                for i in range(from_line, to_line + 1):
+                    yield i
+            elif from_line is not None:
+                if all_lines:
+                    yield f"{from_line}..."
+                else:
+                    yield from_line
+            elif to_line is not None:
+                yield f"0..{to_line}"
+    def collect_line_numbers(self) -> List[int | str]:
+        these = self.scanner.these
+        from_line = self.scanner.from_line
+        to_line = self.scanner.to_line
+        all_lines = self.scanner.all_lines
+        return self._collect_line_numbers(
+            these=these, from_line=from_line, to_line=to_line, all_lines=all_lines
+        )
+    def _collect_line_numbers(
+        self,
+        *,
+        these: List[int] = [],
+        from_line: int = None,
+        to_line: int = None,
+        all_lines: bool = None,
+    ) -> List[int | str]:
+        collect = []
+        for i in self._line_numbers(
+            these=these, from_line=from_line, to_line=to_line, all_lines=all_lines
+        ):
+            collect.append(i)
+        return collect

csvpath/matching/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

csvpath/matching/expression_encoder.py ADDED Viewed

@@ -0,0 +1,108 @@
+from csvpath.matching.productions.expression import Expression
+from csvpath.matching.productions.equality import Equality
+from csvpath.matching.productions.variable import Variable
+from csvpath.matching.productions.header import Header
+from csvpath.matching.productions.term import Term
+from csvpath.matching.functions.function import Function
+from typing import Any, List
+class ExpressionEncoder:
+    def list_to_json(self, alist: List[Any]) -> str:
+        json = "[ "
+        for _ in alist:
+            json = f"{json} {self.to_json(_)} "
+        json = f"{json} ] "
+        return json
+    def simple_list_to_json(self, alist: List[Any]) -> str:
+        json = "[ "
+        for i, _ in enumerate(alist):
+            json = f"{json} {self.to_json(_)} "
+            if i < len(alist) - 1:
+                json = f"{json}, "
+        json = f"{json} ] "
+        return json
+    def valued_list_to_json(self, alist: List[List[Any]]) -> str:
+        json = "[ "
+        for i, _ in enumerate(alist):
+            json = f"{json} {self.to_json(_[0])} "
+            if i < len(alist) - 1:
+                json = f"{json}, "
+        json = f"{json} ] "
+        return json
+    def to_json(self, o):
+        if o is None:
+            return "None"
+        json = ""
+        return self._encode(json, o)
+    def _encode(self, json: str, o) -> str:
+        if isinstance(o, Expression):
+            return self.expression(json, o)
+        elif isinstance(o, Equality):
+            return self.equality(json, o)
+        elif isinstance(o, Function):
+            return self.function(json, o)
+        elif isinstance(o, Header):
+            return self.header(json, o)
+        elif isinstance(o, Variable):
+            return self.variable(json, o)
+        elif isinstance(o, Term):
+            return self.term(json, o)
+        elif o is None:
+            return f'{json} "None" '
+        else:
+            raise Exception(f"what am I {o}")
+    def matchable(self, json: str, m) -> str:
+        json = f'{json} "base_class":"matchable", '
+        json = f'{json} "parent_class":"{m.parent.__class__}", '
+        json = f'{json} "value":"{m.value}", '
+        json = f'{json} "name":"{m.name}", '
+        json = f'{json} "children": [ '
+        for i, _ in enumerate(m.children):
+            json = self._encode(json, _)
+            if i < len(m.children) - 1:
+                json = f"{json}, "
+        json = f"{json} ] "
+        return json
+    def expression(self, json: str, e) -> str:
+        json = f"{json} " + '{ "type":"expression", '
+        json = self.matchable(json, e)
+        json = f"{json} " + "} "
+        return json
+    def equality(self, json: str, e) -> str:
+        json = f"{json} " + '{ "type":"equality", '
+        json = self.matchable(json, e)
+        json = f'{json}, "op":"{e.op}" '
+        json = f"{json} " + "} "
+        return json
+    def function(self, json: str, f) -> str:
+        json = f"{json} " + '{ "type":"function", '
+        json = self.matchable(json, f)
+        json = f"{json} " + "} "
+        return json
+    def header(self, json: str, h) -> str:
+        json = f"{json} " + '{ "type":"header", '
+        json = self.matchable(json, h)
+        json = f"{json} " + "} "
+        return json
+    def variable(self, json: str, v) -> str:
+        json = f"{json} " + '{ "type":"variable", '
+        json = self.matchable(json, v)
+        json = f"{json} " + "} "
+        return json
+    def term(self, json: str, t) -> str:
+        json = f"{json} " + '{ "type":"term", '
+        json = self.matchable(json, t)
+        json = f"{json} " + "} "
+        return json

csvpath/matching/expression_math.py ADDED Viewed

@@ -0,0 +1,123 @@
+from csvpath.matching.productions.equality import Equality
+from csvpath.matching.productions.variable import Variable
+from csvpath.matching.productions.term import Term
+from csvpath.matching.productions.header import Header
+from csvpath.matching.functions.function import Function
+"""
+from csvpath.matching.expression_encoder import ExpressionEncoder
+from csvpath.matching.expression_utility import ExpressionUtility
+"""
+class ExpressionMath:
+    """this code works up to a point. there are limitations in
+    the number of operations and there is no precedence or
+    grouping. you can enable math with the CsvPath.do_math()
+    toggle but unless you know it will solve a specific problem
+    you shouldn't. the grammar needs to be reworked to make
+    arithmetic possible without functions, but it isn't a
+    priority."""
+    def is_terminal(self, o):
+        return (
+            isinstance(o, Variable)
+            or isinstance(o, Term)
+            or isinstance(o, Header)
+            or isinstance(o, Function)
+        )
+    def do_math(self, expression):
+        for i, _ in enumerate(expression.children):
+            self.drop_down_pull_up(expression, i, _)
+    def math(self, op, left, right):
+        if left is None or right is None:
+            raise Exception(
+                f"ExpresionMath.math: operands cannot be None: {left}, {right}"
+            )
+        if op == "+":
+            return left + right
+        elif op == "-":
+            return left - right
+        elif op == "*":
+            return left * right
+        elif op == "/":
+            return left / right
+        else:
+            raise Exception(f"op cannot be {op}")
+    #
+    # why is this not combining the last two terms?
+    #
+    def combine_terms(self, parent, i, child):
+        if isinstance(child, Equality) and child.op in ["-", "+", "*", "/"]:
+            lv = child.left.to_value()
+            if child.right is not None:
+                rv = child.right.to_value()
+                term = Term(parent.matcher)
+                term.value = self.math(child.op, lv, rv)
+                parent.children[i] = term
+                term.parent = parent
+                return term, i
+            else:
+                print("not combining terms")
+                return child, i
+        else:
+            print("not an equality with math")
+            return child, i
+    def push_down_right_terminal(self, parent, i, child):
+        eq = isinstance(parent, Equality)
+        op = parent.op in ["-", "+", "*", "/"] if eq else None
+        """
+        print(f"@ push_down_right_terminal: child {ExpressionUtility._dotted('', child)}")
+        json2 = ExpressionEncoder().simple_list_to_json([child])
+        print(f"@ push_down_right_terminal: child: {json2}")
+        """
+        if eq and op:
+            second = self.is_terminal(child)
+            if second:
+                third = isinstance(child.parent, Equality) and isinstance(
+                    child.parent.left, Equality
+                )
+                if third:
+                    # move child down to left
+                    term = Term(child.matcher)
+                    try:
+                        term.value = self.math(
+                            parent.op, child.value, child.parent.left.right.value
+                        )
+                        child.parent.left.right = term
+                        term.parent = child.parent.left
+                        # remove child from it's original place now that term includes its value
+                        child.parent.right = None
+                        replace_me = child.parent.parent.index_of_child(child.parent)
+                        child.parent.parent.children[replace_me] = child.parent.left
+                    except Exception as ex:
+                        print(f"problems? or maybe just mathed out. ex: {ex}")
+    def drop_down_pull_up(self, parent, i, child):
+        # work right to left
+        cs = child.children[:]
+        cs.reverse()
+        for j, _ in enumerate(cs):
+            self.drop_down_pull_up(child, j, _)
+        # if we're a terminal within an equality with math and another terminal
+        # we want to reduce ourselves before anything else. to do that we need to
+        # shortcut to the parent equality
+        choose = isinstance(parent, Equality) and parent.op in ["-", "+", "*", "/"]
+        if choose and child.parent.both_terminal():
+            try:
+                x = child.parent.parent.children.index(parent)
+                child, i = self.combine_terms(parent.parent, x, parent)
+            except Exception:
+                print("no such child anymore. presumably mathed out")
+                pass
+        else:
+            # this method won't combine terms in this equality, but we might
+            # do a pull-up-push-down below. do we need to make this call?
+            child, i = self.combine_terms(parent, i, child)
+        self.push_down_right_terminal(parent, i, child)

csvpath/matching/expression_utility.py ADDED Viewed

@@ -0,0 +1,29 @@
+import hashlib
+class ExpressionUtility:
+    @classmethod
+    def get_id(self, thing):
+        # gets a durable ID so funcs like count() can persist throughout the scan
+        id = str(thing)
+        p = thing.parent
+        while p:
+            id = id + str(p)
+            if p.parent:
+                p = p.parent
+            else:
+                break
+        return hashlib.sha256(id.encode("utf-8")).hexdigest()
+    @classmethod
+    def _dotted(self, s, o):
+        if o is None:
+            return s
+        cs = str(o.__class__)
+        cs = cs[cs.rfind(".") :]
+        c = cs[0 : cs.find("'")]
+        s = f"{c}{s}"
+        try:
+            return self._dotted(s, o.parent)
+        except Exception:
+            return s

csvpath/matching/functions/above.py ADDED Viewed

@@ -0,0 +1,36 @@
+from typing import Any
+from csvpath.matching.functions.function import Function, ChildrenException
+class Above(Function):
+    def to_value(self, *, skip=[]) -> Any:
+        if self in skip:
+            return True
+        if len(self.children) != 1:
+            self.matcher.print(
+                f"Above.to_value: must have 1 equality child: {self.children}"
+            )
+            raise ChildrenException("Above function must have 1 child")
+        if self.children[0].op != ",":
+            raise ChildrenException(
+                f"Above function must have an equality with the ',' operation, not {self.children[0].op}"
+            )
+        thischild = self.children[0].children[0]
+        abovethatchild = self.children[0].children[1]
+        this_is = thischild.to_value(skip=skip)
+        above_that = abovethatchild.to_value(skip=skip)
+        this = -1
+        that = -1
+        try:
+            this = float(this_is)
+            that = float(above_that)
+        except Exception:
+            raise Exception(
+                f"Above.to_value: this: {this}, a {this.__class__}, and {that}, a {that.__class__}"
+            )
+        b = this > that
+        return b
+    def matches(self, *, skip=[]) -> bool:
+        return self.to_value(skip=skip)

csvpath/matching/functions/add.py ADDED Viewed

@@ -0,0 +1,24 @@
+from typing import Any
+from csvpath.matching.functions.function import Function, ChildrenException
+from csvpath.matching.productions.equality import Equality
+class Add(Function):
+    def to_value(self, *, skip=[]) -> Any:
+        if not self.value:
+            if len(self.children) != 1:
+                raise ChildrenException("no children. there must be 1 equality child")
+            child = self.children[0]
+            if not isinstance(child, Equality):
+                raise ChildrenException("must be 1 equality child")
+            siblings = child.commas_to_list()
+            ret = 0
+            for i, sib in enumerate(siblings):
+                v = sib.to_value(skip=skip)
+                ret = v + ret
+            self.value = ret
+        return self.value
+    def matches(self, *, skip=[]) -> bool:
+        return True