kodexa 7.4.414781565138__py3-none-any.whl → 8.0.14958192442__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. kodexa/dataclasses/__init__.py +1 -1
  2. kodexa/model/__init__.py +2 -2
  3. kodexa/model/utils.py +1 -1
  4. kodexa/pipeline/pipeline.py +1 -1
  5. kodexa/platform/client.py +1 -2
  6. kodexa/selectors/__init__.py +1 -1
  7. kodexa/selectors/ast.py +371 -98
  8. kodexa/selectors/error.py +29 -0
  9. kodexa/selectors/kodexa-ast-visitor.py +268 -0
  10. kodexa/selectors/parser.py +91 -0
  11. kodexa/selectors/resources/KodexaSelector.interp +99 -0
  12. kodexa/selectors/resources/KodexaSelector.tokens +56 -0
  13. kodexa/selectors/resources/KodexaSelectorLexer.interp +119 -0
  14. kodexa/selectors/resources/KodexaSelectorLexer.py +204 -0
  15. kodexa/selectors/resources/KodexaSelectorLexer.tokens +56 -0
  16. kodexa/selectors/resources/KodexaSelectorListener.py +570 -0
  17. kodexa/selectors/resources/KodexaSelectorParser.py +3246 -0
  18. kodexa/selectors/resources/KodexaSelectorVisitor.py +323 -0
  19. kodexa/selectors/visitor.py +265 -0
  20. kodexa/steps/__init__.py +4 -2
  21. kodexa/steps/common.py +0 -68
  22. kodexa/testing/test_utils.py +1 -1
  23. {kodexa-7.4.414781565138.dist-info → kodexa-8.0.14958192442.dist-info}/METADATA +3 -1
  24. kodexa-8.0.14958192442.dist-info/RECORD +53 -0
  25. {kodexa-7.4.414781565138.dist-info → kodexa-8.0.14958192442.dist-info}/WHEEL +1 -1
  26. kodexa/model/model.py +0 -3259
  27. kodexa/model/persistence.py +0 -2017
  28. kodexa/selectors/core.py +0 -124
  29. kodexa/selectors/lexrules.py +0 -137
  30. kodexa/selectors/lextab.py +0 -83
  31. kodexa/selectors/lextab.pyi +0 -1
  32. kodexa/selectors/parserules.py +0 -414
  33. kodexa/selectors/parserules.pyi +0 -1
  34. kodexa/selectors/parsetab.py +0 -90
  35. kodexa/selectors/parsetab.pyi +0 -1
  36. kodexa-7.4.414781565138.dist-info/RECORD +0 -51
  37. {kodexa-7.4.414781565138.dist-info → kodexa-8.0.14958192442.dist-info}/LICENSE +0 -0
kodexa/selectors/core.py DELETED
@@ -1,124 +0,0 @@
1
- """Core XPath parsing glue.
2
-
3
- """
4
-
5
- from __future__ import unicode_literals
6
-
7
- import os
8
- import re
9
- import tempfile
10
-
11
- from ply import lex, yacc
12
-
13
- from kodexa.selectors import lexrules
14
- from kodexa.selectors import parserules
15
-
16
- __all__ = ["lexer", "parser", "parse"]
17
-
18
- OPERATOR_FORCERS = {
19
- "PIPELINE_OP",
20
- "ABBREV_AXIS_AT",
21
- "AXIS_SEP",
22
- "OPEN_PAREN",
23
- "OPEN_BRACKET",
24
- "AND_OP",
25
- "OR_OP",
26
- "MOD_OP",
27
- "DIV_OP",
28
- "MULT_OP",
29
- "PATH_SEP",
30
- "ABBREV_PATH_SEP",
31
- "UNION_OP",
32
- "PLUS_OP",
33
- "MINUS_OP",
34
- "EQUAL_OP",
35
- "REL_OP",
36
- "COLON",
37
- }
38
-
39
- NODE_TYPES = {"comment", "text", "processing-instruction", "node"}
40
-
41
-
42
- class LexerWrapper(lex.Lexer):
43
- def token(self):
44
- tok = lex.Lexer.token(self)
45
- if tok is not None:
46
- if tok.type == "STAR_OP":
47
- if self.last is not None and self.last.type not in OPERATOR_FORCERS:
48
- # first half of point 1
49
- tok.type = "MULT_OP"
50
-
51
- if tok.type == "NCNAME":
52
- if self.last is not None and self.last.type not in OPERATOR_FORCERS:
53
- # second half of point 1
54
- operator = lexrules.operator_names.get(tok.value, None)
55
- if operator is not None:
56
- tok.type = operator
57
- else:
58
- self_next = self.peek()
59
- if self_next is not None:
60
- if self_next.type == "OPEN_PAREN":
61
- # point 2
62
- if tok.value in NODE_TYPES:
63
- tok.type = "NODETYPE"
64
- else:
65
- tok.type = "FUNCNAME"
66
- elif self_next.type == "AXIS_SEP":
67
- # point 3
68
- tok.type = "AXISNAME"
69
-
70
- self.last = tok
71
- return tok
72
-
73
- def peek(self):
74
- clone = self.clone()
75
- return clone.token()
76
-
77
-
78
- # try to build the lexer with cached lex table generation. this will fail if
79
- # the user doesn't have write perms on the source directory. in that case,
80
- # try again without lex table generation.
81
- lexdir = os.path.dirname(lexrules.__file__)
82
- lexer = None
83
- try:
84
- lexer = lex.lex(module=lexrules, optimize=1, outputdir=lexdir, reflags=re.UNICODE)
85
- except IOError as e:
86
- import errno
87
-
88
- if e.errno != errno.EACCES:
89
- raise
90
- if lexer is None:
91
- lexer = lex.lex(module=lexrules, reflags=re.UNICODE)
92
- # then dynamically rewrite the lexer class to use the wonky override logic
93
- # above
94
- lexer.__class__ = LexerWrapper
95
- lexer.last = None
96
-
97
- # build the parser. This will generate a parsetab.py in the eulxml.xpath
98
- # directory. Unlike lex, though, this just logs a complaint when it fails
99
- # (contrast lex's explosion). Other than that, it's much less exciting
100
- # than the lexer wackiness.
101
- parsedir = os.path.dirname(parserules.__file__)
102
- # By default, store generated parse files with the code
103
- # If we don't have write permission, put them in the configured tempdir
104
- if not os.access(parsedir, os.W_OK):
105
- parsedir = tempfile.gettempdir()
106
- parser = yacc.yacc(module=parserules, outputdir=parsedir, debug=0)
107
-
108
-
109
- def parse(xpath):
110
- """Parse an xpath."""
111
- # Expose the parse method of the constructed parser,
112
- # but explicitly specify the lexer created here,
113
- # since otherwise parse will use the most-recently created lexer.
114
- return parser.parse(xpath, lexer=lexer)
115
-
116
-
117
- def ptokens(s):
118
- """Lex a string as XPath tokens, and print each token as it is lexed.
119
- This is used primarily for debugging. You probably don't want this
120
- function."""
121
-
122
- lexer.input(s)
123
- for tok in lexer:
124
- print(tok)
@@ -1,137 +0,0 @@
1
- """XPath lexing rules.
2
-
3
- To understand how this module works, it is valuable to have a strong
4
- understanding of the `ply <http://www.dabeaz.com/ply/>` module.
5
- """
6
-
7
- from __future__ import unicode_literals
8
-
9
- operator_names = {
10
- "or": "OR_OP",
11
- "and": "AND_OP",
12
- "div": "DIV_OP",
13
- "mod": "MOD_OP",
14
- "intersect": "INTERSECT_OP",
15
- "stream": "PIPELINE_OP",
16
- }
17
-
18
- tokens = [
19
- "PATH_SEP",
20
- "ABBREV_PATH_SEP",
21
- "ABBREV_STEP_SELF",
22
- "ABBREV_STEP_PARENT",
23
- "AXIS_SEP",
24
- "ABBREV_AXIS_AT",
25
- "OPEN_PAREN",
26
- "CLOSE_PAREN",
27
- "OPEN_BRACKET",
28
- "CLOSE_BRACKET",
29
- "UNION_OP",
30
- "EQUAL_OP",
31
- "REL_OP",
32
- "INTERSECT_OP",
33
- "PLUS_OP",
34
- "MINUS_OP",
35
- "MULT_OP",
36
- "STAR_OP",
37
- "COMMA",
38
- "LITERAL",
39
- "FLOAT",
40
- "INTEGER",
41
- "NCNAME",
42
- "NODETYPE",
43
- "FUNCNAME",
44
- "AXISNAME",
45
- "COLON",
46
- "DOLLAR",
47
- ] + list(operator_names.values())
48
-
49
- t_PATH_SEP = r"/"
50
- t_ABBREV_PATH_SEP = r"//"
51
- t_ABBREV_STEP_SELF = r"\."
52
- t_ABBREV_STEP_PARENT = r"\.\."
53
- t_AXIS_SEP = r"::"
54
- t_ABBREV_AXIS_AT = r"@"
55
- t_OPEN_PAREN = r"\("
56
- t_CLOSE_PAREN = r"\)"
57
- t_OPEN_BRACKET = r"\["
58
- t_CLOSE_BRACKET = r"\]"
59
- t_UNION_OP = r"\|"
60
- t_EQUAL_OP = r"!?="
61
- t_REL_OP = r"[<>]=?"
62
- t_PLUS_OP = r"\+"
63
- t_MINUS_OP = r"-"
64
- t_COMMA = r","
65
- t_COLON = r":"
66
- t_DOLLAR = r"\$"
67
- t_STAR_OP = r"\*"
68
-
69
- t_ignore = " \t\r\n"
70
-
71
- # NOTE: some versions of python cannot compile regular expressions that
72
- # contain unicode characters above U+FFFF, which are allowable in NCNames.
73
- # These characters can be used in Python 2.6.4, but can NOT be used in 2.6.2
74
- # (status in 2.6.3 is unknown). The code below accounts for that and excludes
75
- # the higher character range if Python can't handle it.
76
-
77
- # Monster regex derived from:
78
- # http://www.w3.org/TR/REC-xml/#NT-NameStartChar
79
- # http://www.w3.org/TR/REC-xml/#NT-NameChar
80
- # EXCEPT:
81
- # Technically those productions allow ':'. NCName, on the other hand:
82
- # http://www.w3.org/TR/REC-xml-names/#NT-NCName
83
- # explicitly excludes those names that have ':'. We implement this by
84
- # simply removing ':' from our regexes.
85
-
86
- # NameStartChar regex without characters about U+FFFF
87
- NameStartChar = (
88
- r"[A-Z]|_|[a-z]|\xc0-\xd6]|[\xd8-\xf6]|[\xf8-\u02ff]|"
89
- + r"[\u0370-\u037d]|[\u037f-\u1fff]|[\u200c-\u200d]|[\u2070-\u218f]|"
90
- + r"[\u2c00-\u2fef]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]"
91
- )
92
- # complete NameStartChar regex
93
- Full_NameStartChar = r"(" + NameStartChar + r"|[\U00010000-\U000EFFFF]" + r")"
94
- # additional characters allowed in NCNames after the first character
95
- NameChar_extras = r"[-.0-9\xb7\u0300-\u036f\u203f-\u2040]"
96
-
97
- try:
98
- import re
99
-
100
- # test whether or not re can compile unicode characters above U+FFFF
101
- re.compile(r"[\U00010000-\U00010001]")
102
- # if that worked, then use the full ncname regex
103
- NameStartChar = Full_NameStartChar
104
- except:
105
- # if compilation failed, leave NameStartChar regex as is, which does not
106
- # include the unicode character ranges above U+FFFF
107
- pass
108
-
109
- NCNAME_REGEX = (
110
- r"(" + NameStartChar + r")(" + NameStartChar + r"|" + NameChar_extras + r")*"
111
- )
112
-
113
- NODE_TYPES = set(["comment", "text", "processing-instruction", "node"])
114
-
115
- t_NCNAME = NCNAME_REGEX
116
-
117
-
118
- def t_LITERAL(t):
119
- r""" "[^"]*"|'[^']*'"""
120
- t.value = t.value[1:-1]
121
- return t
122
-
123
-
124
- def t_FLOAT(t):
125
- r"\d+\.\d*|\.\d+"
126
- t.value = float(t.value)
127
- return t
128
-
129
-
130
- def t_INTEGER(t):
131
- r"\d+"
132
- t.value = int(t.value)
133
- return t
134
-
135
-
136
- def t_error(t):
137
- raise TypeError("Unknown text '%s'" % (t.value,))
@@ -1,83 +0,0 @@
1
- # lextab.py. This file automatically created by PLY (version 3.11). Don't edit!
2
- _tabversion = "3.10"
3
- _lextokens = set(
4
- (
5
- "ABBREV_AXIS_AT",
6
- "ABBREV_PATH_SEP",
7
- "ABBREV_STEP_PARENT",
8
- "ABBREV_STEP_SELF",
9
- "AND_OP",
10
- "AXISNAME",
11
- "AXIS_SEP",
12
- "CLOSE_BRACKET",
13
- "CLOSE_PAREN",
14
- "COLON",
15
- "COMMA",
16
- "DIV_OP",
17
- "DOLLAR",
18
- "EQUAL_OP",
19
- "FLOAT",
20
- "FUNCNAME",
21
- "INTEGER",
22
- "INTERSECT_OP",
23
- "LITERAL",
24
- "MINUS_OP",
25
- "MOD_OP",
26
- "MULT_OP",
27
- "NCNAME",
28
- "NODETYPE",
29
- "OPEN_BRACKET",
30
- "OPEN_PAREN",
31
- "OR_OP",
32
- "PATH_SEP",
33
- "PIPELINE_OP",
34
- "PLUS_OP",
35
- "REL_OP",
36
- "STAR_OP",
37
- "UNION_OP",
38
- )
39
- )
40
- _lexreflags = 32
41
- _lexliterals = ""
42
- _lexstateinfo = {"INITIAL": "inclusive"}
43
- _lexstatere = {
44
- "INITIAL": [
45
- (
46
- "(?P<t_LITERAL>\"[^\"]*\"|'[^']*')|(?P<t_FLOAT>\\d+\\.\\d*|\\.\\d+)|(?P<t_INTEGER>\\d+)|(?P<t_NCNAME>(([A-Z]|_|[a-z]|\\xc0-\\xd6]|[\\xd8-\\xf6]|[\\xf8-\\u02ff]|[\\u0370-\\u037d]|[\\u037f-\\u1fff]|[\\u200c-\\u200d]|[\\u2070-\\u218f]|[\\u2c00-\\u2fef]|[\\u3001-\\uD7FF]|[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\U00010000-\\U000EFFFF]))(([A-Z]|_|[a-z]|\\xc0-\\xd6]|[\\xd8-\\xf6]|[\\xf8-\\u02ff]|[\\u0370-\\u037d]|[\\u037f-\\u1fff]|[\\u200c-\\u200d]|[\\u2070-\\u218f]|[\\u2c00-\\u2fef]|[\\u3001-\\uD7FF]|[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\U00010000-\\U000EFFFF])|[-.0-9\\xb7\\u0300-\\u036f\\u203f-\\u2040])*)|(?P<t_REL_OP>[<>]=?)|(?P<t_ABBREV_STEP_PARENT>\\.\\.)|(?P<t_EQUAL_OP>!?=)|(?P<t_ABBREV_PATH_SEP>//)|(?P<t_ABBREV_STEP_SELF>\\.)|(?P<t_AXIS_SEP>::)|(?P<t_CLOSE_BRACKET>\\])|(?P<t_CLOSE_PAREN>\\))|(?P<t_DOLLAR>\\$)|(?P<t_OPEN_BRACKET>\\[)|(?P<t_OPEN_PAREN>\\()|(?P<t_PIPELINE_OP>::)|(?P<t_PLUS_OP>\\+)|(?P<t_STAR_OP>\\*)|(?P<t_UNION_OP>\\|)|(?P<t_ABBREV_AXIS_AT>@)|(?P<t_COLON>:)|(?P<t_COMMA>,)|(?P<t_MINUS_OP>-)|(?P<t_PATH_SEP>/)",
47
- [
48
- None,
49
- ("t_LITERAL", "LITERAL"),
50
- ("t_FLOAT", "FLOAT"),
51
- ("t_INTEGER", "INTEGER"),
52
- (None, "NCNAME"),
53
- None,
54
- None,
55
- None,
56
- None,
57
- (None, "REL_OP"),
58
- (None, "ABBREV_STEP_PARENT"),
59
- (None, "EQUAL_OP"),
60
- (None, "ABBREV_PATH_SEP"),
61
- (None, "ABBREV_STEP_SELF"),
62
- (None, "AXIS_SEP"),
63
- (None, "CLOSE_BRACKET"),
64
- (None, "CLOSE_PAREN"),
65
- (None, "DOLLAR"),
66
- (None, "OPEN_BRACKET"),
67
- (None, "OPEN_PAREN"),
68
- (None, "PIPELINE_OP"),
69
- (None, "PLUS_OP"),
70
- (None, "STAR_OP"),
71
- (None, "UNION_OP"),
72
- (None, "ABBREV_AXIS_AT"),
73
- (None, "COLON"),
74
- (None, "COMMA"),
75
- (None, "MINUS_OP"),
76
- (None, "PATH_SEP"),
77
- ],
78
- )
79
- ]
80
- }
81
- _lexstateignore = {"INITIAL": " \t\r\n"}
82
- _lexstateerrorf = {"INITIAL": "t_error"}
83
- _lexstateeoff = {}
@@ -1 +0,0 @@
1
- # Empty on purpose to stop mypy analyzing the generated file