kodexa 7.5.514404640805__py3-none-any.whl → 8.0.14958192442__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/dataclasses/__init__.py +1 -1
- kodexa/model/__init__.py +2 -2
- kodexa/model/objects.py +21 -1
- kodexa/model/utils.py +1 -1
- kodexa/pipeline/pipeline.py +1 -1
- kodexa/platform/client.py +1 -2
- kodexa/platform/kodexa.py +4 -1
- kodexa/platform/manifest.py +447 -0
- kodexa/selectors/__init__.py +1 -1
- kodexa/selectors/ast.py +371 -98
- kodexa/selectors/error.py +29 -0
- kodexa/selectors/kodexa-ast-visitor.py +268 -0
- kodexa/selectors/parser.py +91 -0
- kodexa/selectors/resources/KodexaSelector.interp +99 -0
- kodexa/selectors/resources/KodexaSelector.tokens +56 -0
- kodexa/selectors/resources/KodexaSelectorLexer.interp +119 -0
- kodexa/selectors/resources/KodexaSelectorLexer.py +204 -0
- kodexa/selectors/resources/KodexaSelectorLexer.tokens +56 -0
- kodexa/selectors/resources/KodexaSelectorListener.py +570 -0
- kodexa/selectors/resources/KodexaSelectorParser.py +3246 -0
- kodexa/selectors/resources/KodexaSelectorVisitor.py +323 -0
- kodexa/selectors/visitor.py +265 -0
- kodexa/steps/__init__.py +4 -2
- kodexa/steps/common.py +0 -68
- kodexa/testing/test_utils.py +1 -1
- {kodexa-7.5.514404640805.dist-info → kodexa-8.0.14958192442.dist-info}/METADATA +7 -3
- kodexa-8.0.14958192442.dist-info/RECORD +53 -0
- {kodexa-7.5.514404640805.dist-info → kodexa-8.0.14958192442.dist-info}/WHEEL +1 -1
- kodexa/model/model.py +0 -3259
- kodexa/model/persistence.py +0 -2017
- kodexa/selectors/core.py +0 -124
- kodexa/selectors/lexrules.py +0 -137
- kodexa/selectors/lextab.py +0 -83
- kodexa/selectors/lextab.pyi +0 -1
- kodexa/selectors/parserules.py +0 -414
- kodexa/selectors/parserules.pyi +0 -1
- kodexa/selectors/parsetab.py +0 -4149
- kodexa/selectors/parsetab.pyi +0 -1
- kodexa-7.5.514404640805.dist-info/RECORD +0 -50
- {kodexa-7.5.514404640805.dist-info → kodexa-8.0.14958192442.dist-info}/LICENSE +0 -0
kodexa/selectors/core.py
DELETED
@@ -1,124 +0,0 @@
|
|
1
|
-
"""Core XPath parsing glue.
|
2
|
-
|
3
|
-
"""
|
4
|
-
|
5
|
-
from __future__ import unicode_literals
|
6
|
-
|
7
|
-
import os
|
8
|
-
import re
|
9
|
-
import tempfile
|
10
|
-
|
11
|
-
from ply import lex, yacc
|
12
|
-
|
13
|
-
from kodexa.selectors import lexrules
|
14
|
-
from kodexa.selectors import parserules
|
15
|
-
|
16
|
-
__all__ = ["lexer", "parser", "parse"]
|
17
|
-
|
18
|
-
OPERATOR_FORCERS = {
|
19
|
-
"PIPELINE_OP",
|
20
|
-
"ABBREV_AXIS_AT",
|
21
|
-
"AXIS_SEP",
|
22
|
-
"OPEN_PAREN",
|
23
|
-
"OPEN_BRACKET",
|
24
|
-
"AND_OP",
|
25
|
-
"OR_OP",
|
26
|
-
"MOD_OP",
|
27
|
-
"DIV_OP",
|
28
|
-
"MULT_OP",
|
29
|
-
"PATH_SEP",
|
30
|
-
"ABBREV_PATH_SEP",
|
31
|
-
"UNION_OP",
|
32
|
-
"PLUS_OP",
|
33
|
-
"MINUS_OP",
|
34
|
-
"EQUAL_OP",
|
35
|
-
"REL_OP",
|
36
|
-
"COLON",
|
37
|
-
}
|
38
|
-
|
39
|
-
NODE_TYPES = {"comment", "text", "processing-instruction", "node"}
|
40
|
-
|
41
|
-
|
42
|
-
class LexerWrapper(lex.Lexer):
|
43
|
-
def token(self):
|
44
|
-
tok = lex.Lexer.token(self)
|
45
|
-
if tok is not None:
|
46
|
-
if tok.type == "STAR_OP":
|
47
|
-
if self.last is not None and self.last.type not in OPERATOR_FORCERS:
|
48
|
-
# first half of point 1
|
49
|
-
tok.type = "MULT_OP"
|
50
|
-
|
51
|
-
if tok.type == "NCNAME":
|
52
|
-
if self.last is not None and self.last.type not in OPERATOR_FORCERS:
|
53
|
-
# second half of point 1
|
54
|
-
operator = lexrules.operator_names.get(tok.value, None)
|
55
|
-
if operator is not None:
|
56
|
-
tok.type = operator
|
57
|
-
else:
|
58
|
-
self_next = self.peek()
|
59
|
-
if self_next is not None:
|
60
|
-
if self_next.type == "OPEN_PAREN":
|
61
|
-
# point 2
|
62
|
-
if tok.value in NODE_TYPES:
|
63
|
-
tok.type = "NODETYPE"
|
64
|
-
else:
|
65
|
-
tok.type = "FUNCNAME"
|
66
|
-
elif self_next.type == "AXIS_SEP":
|
67
|
-
# point 3
|
68
|
-
tok.type = "AXISNAME"
|
69
|
-
|
70
|
-
self.last = tok
|
71
|
-
return tok
|
72
|
-
|
73
|
-
def peek(self):
|
74
|
-
clone = self.clone()
|
75
|
-
return clone.token()
|
76
|
-
|
77
|
-
|
78
|
-
# try to build the lexer with cached lex table generation. this will fail if
|
79
|
-
# the user doesn't have write perms on the source directory. in that case,
|
80
|
-
# try again without lex table generation.
|
81
|
-
lexdir = os.path.dirname(lexrules.__file__)
|
82
|
-
lexer = None
|
83
|
-
try:
|
84
|
-
lexer = lex.lex(module=lexrules, optimize=1, outputdir=lexdir, reflags=re.UNICODE)
|
85
|
-
except IOError as e:
|
86
|
-
import errno
|
87
|
-
|
88
|
-
if e.errno != errno.EACCES:
|
89
|
-
raise
|
90
|
-
if lexer is None:
|
91
|
-
lexer = lex.lex(module=lexrules, reflags=re.UNICODE)
|
92
|
-
# then dynamically rewrite the lexer class to use the wonky override logic
|
93
|
-
# above
|
94
|
-
lexer.__class__ = LexerWrapper
|
95
|
-
lexer.last = None
|
96
|
-
|
97
|
-
# build the parser. This will generate a parsetab.py in the eulxml.xpath
|
98
|
-
# directory. Unlike lex, though, this just logs a complaint when it fails
|
99
|
-
# (contrast lex's explosion). Other than that, it's much less exciting
|
100
|
-
# than the lexer wackiness.
|
101
|
-
parsedir = os.path.dirname(parserules.__file__)
|
102
|
-
# By default, store generated parse files with the code
|
103
|
-
# If we don't have write permission, put them in the configured tempdir
|
104
|
-
if not os.access(parsedir, os.W_OK):
|
105
|
-
parsedir = tempfile.gettempdir()
|
106
|
-
parser = yacc.yacc(module=parserules, outputdir=parsedir, debug=0)
|
107
|
-
|
108
|
-
|
109
|
-
def parse(xpath):
|
110
|
-
"""Parse an xpath."""
|
111
|
-
# Expose the parse method of the constructed parser,
|
112
|
-
# but explicitly specify the lexer created here,
|
113
|
-
# since otherwise parse will use the most-recently created lexer.
|
114
|
-
return parser.parse(xpath, lexer=lexer)
|
115
|
-
|
116
|
-
|
117
|
-
def ptokens(s):
|
118
|
-
"""Lex a string as XPath tokens, and print each token as it is lexed.
|
119
|
-
This is used primarily for debugging. You probably don't want this
|
120
|
-
function."""
|
121
|
-
|
122
|
-
lexer.input(s)
|
123
|
-
for tok in lexer:
|
124
|
-
print(tok)
|
kodexa/selectors/lexrules.py
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
"""XPath lexing rules.
|
2
|
-
|
3
|
-
To understand how this module works, it is valuable to have a strong
|
4
|
-
understanding of the `ply <http://www.dabeaz.com/ply/>` module.
|
5
|
-
"""
|
6
|
-
|
7
|
-
from __future__ import unicode_literals
|
8
|
-
|
9
|
-
operator_names = {
|
10
|
-
"or": "OR_OP",
|
11
|
-
"and": "AND_OP",
|
12
|
-
"div": "DIV_OP",
|
13
|
-
"mod": "MOD_OP",
|
14
|
-
"intersect": "INTERSECT_OP",
|
15
|
-
"stream": "PIPELINE_OP",
|
16
|
-
}
|
17
|
-
|
18
|
-
tokens = [
|
19
|
-
"PATH_SEP",
|
20
|
-
"ABBREV_PATH_SEP",
|
21
|
-
"ABBREV_STEP_SELF",
|
22
|
-
"ABBREV_STEP_PARENT",
|
23
|
-
"AXIS_SEP",
|
24
|
-
"ABBREV_AXIS_AT",
|
25
|
-
"OPEN_PAREN",
|
26
|
-
"CLOSE_PAREN",
|
27
|
-
"OPEN_BRACKET",
|
28
|
-
"CLOSE_BRACKET",
|
29
|
-
"UNION_OP",
|
30
|
-
"EQUAL_OP",
|
31
|
-
"REL_OP",
|
32
|
-
"INTERSECT_OP",
|
33
|
-
"PLUS_OP",
|
34
|
-
"MINUS_OP",
|
35
|
-
"MULT_OP",
|
36
|
-
"STAR_OP",
|
37
|
-
"COMMA",
|
38
|
-
"LITERAL",
|
39
|
-
"FLOAT",
|
40
|
-
"INTEGER",
|
41
|
-
"NCNAME",
|
42
|
-
"NODETYPE",
|
43
|
-
"FUNCNAME",
|
44
|
-
"AXISNAME",
|
45
|
-
"COLON",
|
46
|
-
"DOLLAR",
|
47
|
-
] + list(operator_names.values())
|
48
|
-
|
49
|
-
t_PATH_SEP = r"/"
|
50
|
-
t_ABBREV_PATH_SEP = r"//"
|
51
|
-
t_ABBREV_STEP_SELF = r"\."
|
52
|
-
t_ABBREV_STEP_PARENT = r"\.\."
|
53
|
-
t_AXIS_SEP = r"::"
|
54
|
-
t_ABBREV_AXIS_AT = r"@"
|
55
|
-
t_OPEN_PAREN = r"\("
|
56
|
-
t_CLOSE_PAREN = r"\)"
|
57
|
-
t_OPEN_BRACKET = r"\["
|
58
|
-
t_CLOSE_BRACKET = r"\]"
|
59
|
-
t_UNION_OP = r"\|"
|
60
|
-
t_EQUAL_OP = r"!?="
|
61
|
-
t_REL_OP = r"[<>]=?"
|
62
|
-
t_PLUS_OP = r"\+"
|
63
|
-
t_MINUS_OP = r"-"
|
64
|
-
t_COMMA = r","
|
65
|
-
t_COLON = r":"
|
66
|
-
t_DOLLAR = r"\$"
|
67
|
-
t_STAR_OP = r"\*"
|
68
|
-
|
69
|
-
t_ignore = " \t\r\n"
|
70
|
-
|
71
|
-
# NOTE: some versions of python cannot compile regular expressions that
|
72
|
-
# contain unicode characters above U+FFFF, which are allowable in NCNames.
|
73
|
-
# These characters can be used in Python 2.6.4, but can NOT be used in 2.6.2
|
74
|
-
# (status in 2.6.3 is unknown). The code below accounts for that and excludes
|
75
|
-
# the higher character range if Python can't handle it.
|
76
|
-
|
77
|
-
# Monster regex derived from:
|
78
|
-
# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
|
79
|
-
# http://www.w3.org/TR/REC-xml/#NT-NameChar
|
80
|
-
# EXCEPT:
|
81
|
-
# Technically those productions allow ':'. NCName, on the other hand:
|
82
|
-
# http://www.w3.org/TR/REC-xml-names/#NT-NCName
|
83
|
-
# explicitly excludes those names that have ':'. We implement this by
|
84
|
-
# simply removing ':' from our regexes.
|
85
|
-
|
86
|
-
# NameStartChar regex without characters about U+FFFF
|
87
|
-
NameStartChar = (
|
88
|
-
r"[A-Z]|_|[a-z]|\xc0-\xd6]|[\xd8-\xf6]|[\xf8-\u02ff]|"
|
89
|
-
+ r"[\u0370-\u037d]|[\u037f-\u1fff]|[\u200c-\u200d]|[\u2070-\u218f]|"
|
90
|
-
+ r"[\u2c00-\u2fef]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]"
|
91
|
-
)
|
92
|
-
# complete NameStartChar regex
|
93
|
-
Full_NameStartChar = r"(" + NameStartChar + r"|[\U00010000-\U000EFFFF]" + r")"
|
94
|
-
# additional characters allowed in NCNames after the first character
|
95
|
-
NameChar_extras = r"[-.0-9\xb7\u0300-\u036f\u203f-\u2040]"
|
96
|
-
|
97
|
-
try:
|
98
|
-
import re
|
99
|
-
|
100
|
-
# test whether or not re can compile unicode characters above U+FFFF
|
101
|
-
re.compile(r"[\U00010000-\U00010001]")
|
102
|
-
# if that worked, then use the full ncname regex
|
103
|
-
NameStartChar = Full_NameStartChar
|
104
|
-
except:
|
105
|
-
# if compilation failed, leave NameStartChar regex as is, which does not
|
106
|
-
# include the unicode character ranges above U+FFFF
|
107
|
-
pass
|
108
|
-
|
109
|
-
NCNAME_REGEX = (
|
110
|
-
r"(" + NameStartChar + r")(" + NameStartChar + r"|" + NameChar_extras + r")*"
|
111
|
-
)
|
112
|
-
|
113
|
-
NODE_TYPES = set(["comment", "text", "processing-instruction", "node"])
|
114
|
-
|
115
|
-
t_NCNAME = NCNAME_REGEX
|
116
|
-
|
117
|
-
|
118
|
-
def t_LITERAL(t):
|
119
|
-
r""" "[^"]*"|'[^']*'"""
|
120
|
-
t.value = t.value[1:-1]
|
121
|
-
return t
|
122
|
-
|
123
|
-
|
124
|
-
def t_FLOAT(t):
|
125
|
-
r"\d+\.\d*|\.\d+"
|
126
|
-
t.value = float(t.value)
|
127
|
-
return t
|
128
|
-
|
129
|
-
|
130
|
-
def t_INTEGER(t):
|
131
|
-
r"\d+"
|
132
|
-
t.value = int(t.value)
|
133
|
-
return t
|
134
|
-
|
135
|
-
|
136
|
-
def t_error(t):
|
137
|
-
raise TypeError("Unknown text '%s'" % (t.value,))
|
kodexa/selectors/lextab.py
DELETED
@@ -1,83 +0,0 @@
|
|
1
|
-
# lextab.py. This file automatically created by PLY (version 3.11). Don't edit!
|
2
|
-
_tabversion = "3.10"
|
3
|
-
_lextokens = set(
|
4
|
-
(
|
5
|
-
"ABBREV_AXIS_AT",
|
6
|
-
"ABBREV_PATH_SEP",
|
7
|
-
"ABBREV_STEP_PARENT",
|
8
|
-
"ABBREV_STEP_SELF",
|
9
|
-
"AND_OP",
|
10
|
-
"AXISNAME",
|
11
|
-
"AXIS_SEP",
|
12
|
-
"CLOSE_BRACKET",
|
13
|
-
"CLOSE_PAREN",
|
14
|
-
"COLON",
|
15
|
-
"COMMA",
|
16
|
-
"DIV_OP",
|
17
|
-
"DOLLAR",
|
18
|
-
"EQUAL_OP",
|
19
|
-
"FLOAT",
|
20
|
-
"FUNCNAME",
|
21
|
-
"INTEGER",
|
22
|
-
"INTERSECT_OP",
|
23
|
-
"LITERAL",
|
24
|
-
"MINUS_OP",
|
25
|
-
"MOD_OP",
|
26
|
-
"MULT_OP",
|
27
|
-
"NCNAME",
|
28
|
-
"NODETYPE",
|
29
|
-
"OPEN_BRACKET",
|
30
|
-
"OPEN_PAREN",
|
31
|
-
"OR_OP",
|
32
|
-
"PATH_SEP",
|
33
|
-
"PIPELINE_OP",
|
34
|
-
"PLUS_OP",
|
35
|
-
"REL_OP",
|
36
|
-
"STAR_OP",
|
37
|
-
"UNION_OP",
|
38
|
-
)
|
39
|
-
)
|
40
|
-
_lexreflags = 32
|
41
|
-
_lexliterals = ""
|
42
|
-
_lexstateinfo = {"INITIAL": "inclusive"}
|
43
|
-
_lexstatere = {
|
44
|
-
"INITIAL": [
|
45
|
-
(
|
46
|
-
"(?P<t_LITERAL>\"[^\"]*\"|'[^']*')|(?P<t_FLOAT>\\d+\\.\\d*|\\.\\d+)|(?P<t_INTEGER>\\d+)|(?P<t_NCNAME>(([A-Z]|_|[a-z]|\\xc0-\\xd6]|[\\xd8-\\xf6]|[\\xf8-\\u02ff]|[\\u0370-\\u037d]|[\\u037f-\\u1fff]|[\\u200c-\\u200d]|[\\u2070-\\u218f]|[\\u2c00-\\u2fef]|[\\u3001-\\uD7FF]|[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\U00010000-\\U000EFFFF]))(([A-Z]|_|[a-z]|\\xc0-\\xd6]|[\\xd8-\\xf6]|[\\xf8-\\u02ff]|[\\u0370-\\u037d]|[\\u037f-\\u1fff]|[\\u200c-\\u200d]|[\\u2070-\\u218f]|[\\u2c00-\\u2fef]|[\\u3001-\\uD7FF]|[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\U00010000-\\U000EFFFF])|[-.0-9\\xb7\\u0300-\\u036f\\u203f-\\u2040])*)|(?P<t_REL_OP>[<>]=?)|(?P<t_ABBREV_STEP_PARENT>\\.\\.)|(?P<t_EQUAL_OP>!?=)|(?P<t_ABBREV_PATH_SEP>//)|(?P<t_ABBREV_STEP_SELF>\\.)|(?P<t_AXIS_SEP>::)|(?P<t_CLOSE_BRACKET>\\])|(?P<t_CLOSE_PAREN>\\))|(?P<t_DOLLAR>\\$)|(?P<t_OPEN_BRACKET>\\[)|(?P<t_OPEN_PAREN>\\()|(?P<t_PIPELINE_OP>::)|(?P<t_PLUS_OP>\\+)|(?P<t_STAR_OP>\\*)|(?P<t_UNION_OP>\\|)|(?P<t_ABBREV_AXIS_AT>@)|(?P<t_COLON>:)|(?P<t_COMMA>,)|(?P<t_MINUS_OP>-)|(?P<t_PATH_SEP>/)",
|
47
|
-
[
|
48
|
-
None,
|
49
|
-
("t_LITERAL", "LITERAL"),
|
50
|
-
("t_FLOAT", "FLOAT"),
|
51
|
-
("t_INTEGER", "INTEGER"),
|
52
|
-
(None, "NCNAME"),
|
53
|
-
None,
|
54
|
-
None,
|
55
|
-
None,
|
56
|
-
None,
|
57
|
-
(None, "REL_OP"),
|
58
|
-
(None, "ABBREV_STEP_PARENT"),
|
59
|
-
(None, "EQUAL_OP"),
|
60
|
-
(None, "ABBREV_PATH_SEP"),
|
61
|
-
(None, "ABBREV_STEP_SELF"),
|
62
|
-
(None, "AXIS_SEP"),
|
63
|
-
(None, "CLOSE_BRACKET"),
|
64
|
-
(None, "CLOSE_PAREN"),
|
65
|
-
(None, "DOLLAR"),
|
66
|
-
(None, "OPEN_BRACKET"),
|
67
|
-
(None, "OPEN_PAREN"),
|
68
|
-
(None, "PIPELINE_OP"),
|
69
|
-
(None, "PLUS_OP"),
|
70
|
-
(None, "STAR_OP"),
|
71
|
-
(None, "UNION_OP"),
|
72
|
-
(None, "ABBREV_AXIS_AT"),
|
73
|
-
(None, "COLON"),
|
74
|
-
(None, "COMMA"),
|
75
|
-
(None, "MINUS_OP"),
|
76
|
-
(None, "PATH_SEP"),
|
77
|
-
],
|
78
|
-
)
|
79
|
-
]
|
80
|
-
}
|
81
|
-
_lexstateignore = {"INITIAL": " \t\r\n"}
|
82
|
-
_lexstateerrorf = {"INITIAL": "t_error"}
|
83
|
-
_lexstateeoff = {}
|
kodexa/selectors/lextab.pyi
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
# Empty on purpose to stop mypy analyzing the generated file
|