lexcql-parser 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lexcql_parser-1.0.0/PKG-INFO +167 -0
- lexcql_parser-1.0.0/README.md +127 -0
- lexcql_parser-1.0.0/pyproject.toml +88 -0
- lexcql_parser-1.0.0/src/lexcql/LexLexer.g4 +39 -0
- lexcql_parser-1.0.0/src/lexcql/LexLexer.interp +68 -0
- lexcql_parser-1.0.0/src/lexcql/LexLexer.py +98 -0
- lexcql_parser-1.0.0/src/lexcql/LexLexer.tokens +28 -0
- lexcql_parser-1.0.0/src/lexcql/LexParser.g4 +79 -0
- lexcql_parser-1.0.0/src/lexcql/LexParser.interp +65 -0
- lexcql_parser-1.0.0/src/lexcql/LexParser.py +1216 -0
- lexcql_parser-1.0.0/src/lexcql/LexParser.tokens +28 -0
- lexcql_parser-1.0.0/src/lexcql/LexParserListener.py +192 -0
- lexcql_parser-1.0.0/src/lexcql/__init__.py +71 -0
- lexcql_parser-1.0.0/src/lexcql/parser.py +942 -0
- lexcql_parser-1.0.0/src/lexcql/py.typed +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lexcql-parser
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: LexCQL Query Grammar and Parser
|
|
5
|
+
Keywords: LexCQL,FCS,CQL,Query Parser
|
|
6
|
+
Author: Erik Körner
|
|
7
|
+
Author-email: Erik Körner <koerner@saw-leipzig.de>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Text Processing :: General
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Requires-Dist: antlr4-python3-runtime>=4.13.2
|
|
23
|
+
Requires-Dist: antlr4-tools>=0.2.2 ; extra == 'antlr'
|
|
24
|
+
Requires-Dist: twine>=6.2.0 ; extra == 'build'
|
|
25
|
+
Requires-Dist: black>=26.1.0 ; extra == 'style'
|
|
26
|
+
Requires-Dist: flake8>=7.3.0 ; extra == 'style'
|
|
27
|
+
Requires-Dist: isort>=8.0.0 ; extra == 'style'
|
|
28
|
+
Requires-Dist: pytest>=9.0.2 ; extra == 'test'
|
|
29
|
+
Requires-Dist: pytest-clarity>=1.0.1 ; extra == 'test'
|
|
30
|
+
Requires-Dist: pytest-cov>=7.0.0 ; extra == 'test'
|
|
31
|
+
Requires-Dist: pytest-randomly>=4.0.1 ; extra == 'test'
|
|
32
|
+
Requires-Python: >=3.10
|
|
33
|
+
Project-URL: Source, https://github.com/Querela/lexcql-python.git
|
|
34
|
+
Project-URL: Issues, https://github.com/Querela/lexcql-python/issues
|
|
35
|
+
Provides-Extra: antlr
|
|
36
|
+
Provides-Extra: build
|
|
37
|
+
Provides-Extra: style
|
|
38
|
+
Provides-Extra: test
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
# LexCQL for Python
|
|
42
|
+
|
|
43
|
+
A query parser for LexCQL, the query language for lexical resources in the CLARIN Federated Content Search (FCS).
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
Install from PyPI:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install lexcql-parser
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Or install from source:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git clone https://github.com/Querela/lexcql-python.git
|
|
57
|
+
cd lexcql-python
|
|
58
|
+
uv build
|
|
59
|
+
|
|
60
|
+
# built package
|
|
61
|
+
python3 -m pip install dist/lexcql_parser-<version>-py3-none-any.whl
|
|
62
|
+
# or
|
|
63
|
+
python3 -m pip install dist/lexcql_parser-<version>.tar.gz
|
|
64
|
+
|
|
65
|
+
# for local development
|
|
66
|
+
python3 -m pip install -e .
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
The high-level interface `lexcql.parser.QueryParser` wraps the ANTLR4 parse tree into a simplified query node tree that is easier to work with. The `lexcql-parser` exposes a simple parsing function with `lexcql.parse(input: str, enableSourceLocations: bool = True) -> lexcql.parser.QueryNode`:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
import lexcql
|
|
75
|
+
|
|
76
|
+
## parsing a valid query into a query node tree
|
|
77
|
+
# our query input string
|
|
78
|
+
input = "Banane Or lemma =/lang=eng apple"
|
|
79
|
+
# parse into QueryNode tree
|
|
80
|
+
sc = lexcql.parse(input)
|
|
81
|
+
# print stringified tree
|
|
82
|
+
print(str(sc))
|
|
83
|
+
|
|
84
|
+
## handling possibly invalid queries
|
|
85
|
+
input = "broken query"
|
|
86
|
+
try:
|
|
87
|
+
lexcql.parse(input)
|
|
88
|
+
except lexcql.QueryParserException as ex:
|
|
89
|
+
print(f"Error: {ex}")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
You can also use the more low-level ANTLR4 framework to parse the query string. A handy wrapper is provided with `lexcql.antlr_parse(input: str) -> LexParser.QueryContext`.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
96
|
+
from lexcql.parser import LexLexer, LexParser
|
|
97
|
+
|
|
98
|
+
input = "example"
|
|
99
|
+
input_stream = InputStream(input)
|
|
100
|
+
lexer = LexLexer(input_stream)
|
|
101
|
+
stream = CommonTokenStream(lexer)
|
|
102
|
+
parser = LexParser(stream)
|
|
103
|
+
tree: LexParser.QueryContext = parser.query()
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Development
|
|
107
|
+
|
|
108
|
+
Fetch (or update) grammar files:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/clarin-eric/fcs-ql.git
|
|
112
|
+
cp fcs-ql/src/main/antlr4/eu/clarin/sru/fcs/qlparser/lex/*.g4 src/lexcql/
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
(Re-)Generate python parser code:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# setup environment
|
|
119
|
+
uv sync --extra antlr
|
|
120
|
+
# NOTE: you can activate the environment (if you do not want to prefix everything with `uv run`)
|
|
121
|
+
# NOTE: `uv` does not play nicely with `pyenv` - if you use `pyenv`, sourcing does NOT work!
|
|
122
|
+
source .venv/bin/activate
|
|
123
|
+
|
|
124
|
+
cd src/lexcql
|
|
125
|
+
uv run antlr4 -Dlanguage=Python3 *.g4
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Run style checks:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# setup environment
|
|
132
|
+
uv sync --extra style
|
|
133
|
+
|
|
134
|
+
uv run black --check .
|
|
135
|
+
uv run flake8 . --show-source --statistics
|
|
136
|
+
uv run isort --check --diff .
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Run tests:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# setup environment
|
|
143
|
+
uv sync --extra test
|
|
144
|
+
|
|
145
|
+
uv run pytest
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Run check before publishing:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# setup environment
|
|
152
|
+
uv sync --extra build
|
|
153
|
+
|
|
154
|
+
# build the package
|
|
155
|
+
uv build
|
|
156
|
+
# run metadata check
|
|
157
|
+
uv run twine check --strict dist/*
|
|
158
|
+
# (manual) check of package contents
|
|
159
|
+
tar tvf dist/lexcql-*.tar.gz
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## See also
|
|
163
|
+
|
|
164
|
+
- [clarin-eric/fcq-ql](https://github.com/clarin-eric/fcs-ql) - FCS-QL/LexCQL Parser (Java)
|
|
165
|
+
- [Querela/fcs-ql-python](https://github.com/Querela/fcs-ql-python) - FCS-QL Parser (Python)
|
|
166
|
+
- [Specification on CLARIN FCS 2](https://www.clarin.eu/content/federated-content-search-clarin-fcs-technical-details) - CLARIN FCS Overview
|
|
167
|
+
- [Specification on LexFCS](https://doi.org/10.5281/zenodo.7849753) - Published LexFCS Specification
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# LexCQL for Python
|
|
2
|
+
|
|
3
|
+
A query parser for LexCQL, the query language for lexical resources in the CLARIN Federated Content Search (FCS).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Install from PyPI:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install lexcql-parser
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or install from source:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/Querela/lexcql-python.git
|
|
17
|
+
cd lexcql-python
|
|
18
|
+
uv build
|
|
19
|
+
|
|
20
|
+
# built package
|
|
21
|
+
python3 -m pip install dist/lexcql_parser-<version>-py3-none-any.whl
|
|
22
|
+
# or
|
|
23
|
+
python3 -m pip install dist/lexcql_parser-<version>.tar.gz
|
|
24
|
+
|
|
25
|
+
# for local development
|
|
26
|
+
python3 -m pip install -e .
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
The high-level interface `lexcql.parser.QueryParser` wraps the ANTLR4 parse tree into a simplified query node tree that is easier to work with. The `lexcql-parser` exposes a simple parsing function with `lexcql.parse(input: str, enableSourceLocations: bool = True) -> lexcql.parser.QueryNode`:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import lexcql
|
|
35
|
+
|
|
36
|
+
## parsing a valid query into a query node tree
|
|
37
|
+
# our query input string
|
|
38
|
+
input = "Banane Or lemma =/lang=eng apple"
|
|
39
|
+
# parse into QueryNode tree
|
|
40
|
+
sc = lexcql.parse(input)
|
|
41
|
+
# print stringified tree
|
|
42
|
+
print(str(sc))
|
|
43
|
+
|
|
44
|
+
## handling possibly invalid queries
|
|
45
|
+
input = "broken query"
|
|
46
|
+
try:
|
|
47
|
+
lexcql.parse(input)
|
|
48
|
+
except lexcql.QueryParserException as ex:
|
|
49
|
+
print(f"Error: {ex}")
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
You can also use the more low-level ANTLR4 framework to parse the query string. A handy wrapper is provided with `lexcql.antlr_parse(input: str) -> LexParser.QueryContext`.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
56
|
+
from lexcql.parser import LexLexer, LexParser
|
|
57
|
+
|
|
58
|
+
input = "example"
|
|
59
|
+
input_stream = InputStream(input)
|
|
60
|
+
lexer = LexLexer(input_stream)
|
|
61
|
+
stream = CommonTokenStream(lexer)
|
|
62
|
+
parser = LexParser(stream)
|
|
63
|
+
tree: LexParser.QueryContext = parser.query()
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Development
|
|
67
|
+
|
|
68
|
+
Fetch (or update) grammar files:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git clone https://github.com/clarin-eric/fcs-ql.git
|
|
72
|
+
cp fcs-ql/src/main/antlr4/eu/clarin/sru/fcs/qlparser/lex/*.g4 src/lexcql/
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
(Re-)Generate python parser code:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# setup environment
|
|
79
|
+
uv sync --extra antlr
|
|
80
|
+
# NOTE: you can activate the environment (if you do not want to prefix everything with `uv run`)
|
|
81
|
+
# NOTE: `uv` does not play nicely with `pyenv` - if you use `pyenv`, sourcing does NOT work!
|
|
82
|
+
source .venv/bin/activate
|
|
83
|
+
|
|
84
|
+
cd src/lexcql
|
|
85
|
+
uv run antlr4 -Dlanguage=Python3 *.g4
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Run style checks:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# setup environment
|
|
92
|
+
uv sync --extra style
|
|
93
|
+
|
|
94
|
+
uv run black --check .
|
|
95
|
+
uv run flake8 . --show-source --statistics
|
|
96
|
+
uv run isort --check --diff .
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Run tests:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# setup environment
|
|
103
|
+
uv sync --extra test
|
|
104
|
+
|
|
105
|
+
uv run pytest
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Run check before publishing:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# setup environment
|
|
112
|
+
uv sync --extra build
|
|
113
|
+
|
|
114
|
+
# build the package
|
|
115
|
+
uv build
|
|
116
|
+
# run metadata check
|
|
117
|
+
uv run twine check --strict dist/*
|
|
118
|
+
# (manual) check of package contents
|
|
119
|
+
tar tvf dist/lexcql-*.tar.gz
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## See also
|
|
123
|
+
|
|
124
|
+
- [clarin-eric/fcq-ql](https://github.com/clarin-eric/fcs-ql) - FCS-QL/LexCQL Parser (Java)
|
|
125
|
+
- [Querela/fcs-ql-python](https://github.com/Querela/fcs-ql-python) - FCS-QL Parser (Python)
|
|
126
|
+
- [Specification on CLARIN FCS 2](https://www.clarin.eu/content/federated-content-search-clarin-fcs-technical-details) - CLARIN FCS Overview
|
|
127
|
+
- [Specification on LexFCS](https://doi.org/10.5281/zenodo.7849753) - Published LexFCS Specification
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "lexcql-parser"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "LexCQL Query Grammar and Parser"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Erik Körner", email = "koerner@saw-leipzig.de" }
|
|
9
|
+
]
|
|
10
|
+
keywords = [
|
|
11
|
+
"LexCQL",
|
|
12
|
+
"FCS",
|
|
13
|
+
"CQL",
|
|
14
|
+
"Query Parser"
|
|
15
|
+
]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 5 - Production/Stable",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
"Topic :: Text Processing :: General",
|
|
29
|
+
"Topic :: Utilities",
|
|
30
|
+
]
|
|
31
|
+
requires-python = ">=3.10"
|
|
32
|
+
dependencies = [
|
|
33
|
+
"antlr4-python3-runtime>=4.13.2",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Source = "https://github.com/Querela/lexcql-python.git"
|
|
38
|
+
Issues = "https://github.com/Querela/lexcql-python/issues"
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
antlr = [
|
|
42
|
+
"antlr4-tools>=0.2.2",
|
|
43
|
+
]
|
|
44
|
+
test = [
|
|
45
|
+
"pytest>=9.0.2",
|
|
46
|
+
"pytest-clarity>=1.0.1",
|
|
47
|
+
"pytest-cov>=7.0.0",
|
|
48
|
+
"pytest-randomly>=4.0.1",
|
|
49
|
+
]
|
|
50
|
+
style = [
|
|
51
|
+
"black>=26.1.0",
|
|
52
|
+
"flake8>=7.3.0",
|
|
53
|
+
"isort>=8.0.0",
|
|
54
|
+
]
|
|
55
|
+
build = [
|
|
56
|
+
"twine>=6.2.0",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
[build-system]
|
|
60
|
+
requires = ["uv_build>=0.10.2,<0.11.0"]
|
|
61
|
+
build-backend = "uv_build"
|
|
62
|
+
|
|
63
|
+
[tool.uv.build-backend]
|
|
64
|
+
module-name = "lexcql"
|
|
65
|
+
source-exclude = [".antlr"]
|
|
66
|
+
|
|
67
|
+
[tool.black]
|
|
68
|
+
line-length = 120
|
|
69
|
+
extend-exclude = '''
|
|
70
|
+
(
|
|
71
|
+
(Lex(Lexer|Parser(Listener)?)).py
|
|
72
|
+
)
|
|
73
|
+
'''
|
|
74
|
+
|
|
75
|
+
[tool.isort]
|
|
76
|
+
force_single_line = true
|
|
77
|
+
line_length = 120
|
|
78
|
+
known_first_party = "lexcql"
|
|
79
|
+
default_section = "THIRDPARTY"
|
|
80
|
+
forced_separate = "test_lexcql"
|
|
81
|
+
skip = [
|
|
82
|
+
".venv",
|
|
83
|
+
"dist",
|
|
84
|
+
"src/lexcql/LexParser.py",
|
|
85
|
+
"src/lexcql/LexLexer.py",
|
|
86
|
+
"src/lexcql/LexParserListener.py",
|
|
87
|
+
]
|
|
88
|
+
skip_gitignore = true
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
lexer grammar LexLexer;
|
|
2
|
+
|
|
3
|
+
/* search clauses */
|
|
4
|
+
|
|
5
|
+
L_PAREN: '(';
|
|
6
|
+
R_PAREN: ')';
|
|
7
|
+
|
|
8
|
+
/* relations */
|
|
9
|
+
|
|
10
|
+
EQUAL: '=';
|
|
11
|
+
GREATER: '>';
|
|
12
|
+
LESSER: '<';
|
|
13
|
+
GREATER_EQUAL: '>=';
|
|
14
|
+
LESSER_EQUAL: '<=';
|
|
15
|
+
NOT_EQUAL: '<>';
|
|
16
|
+
EQUAL_EQUAL: '==';
|
|
17
|
+
|
|
18
|
+
/* modifiers */
|
|
19
|
+
|
|
20
|
+
SLASH: '/';
|
|
21
|
+
|
|
22
|
+
/* booleans */
|
|
23
|
+
|
|
24
|
+
AND: [Aa][Nn][Dd];
|
|
25
|
+
OR: [Oo][Rr];
|
|
26
|
+
NOT: [Nn][Oo][Tt];
|
|
27
|
+
|
|
28
|
+
/* terminals */
|
|
29
|
+
|
|
30
|
+
DOT: '.';
|
|
31
|
+
|
|
32
|
+
QUOTED_STRING: '"' ('\\' ["\\] | ~["\\])* '"';
|
|
33
|
+
|
|
34
|
+
// whitespace must be explicit here as inverted sets can use rules/fragments
|
|
35
|
+
SIMPLE_STRING: (~["\\()/<=> \t\n\r])+;
|
|
36
|
+
|
|
37
|
+
/* whitespace */
|
|
38
|
+
|
|
39
|
+
WS: [ \t\n\r] -> channel(HIDDEN);
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
token literal names:
|
|
2
|
+
null
|
|
3
|
+
'('
|
|
4
|
+
')'
|
|
5
|
+
'='
|
|
6
|
+
'>'
|
|
7
|
+
'<'
|
|
8
|
+
'>='
|
|
9
|
+
'<='
|
|
10
|
+
'<>'
|
|
11
|
+
'=='
|
|
12
|
+
'/'
|
|
13
|
+
null
|
|
14
|
+
null
|
|
15
|
+
null
|
|
16
|
+
'.'
|
|
17
|
+
null
|
|
18
|
+
null
|
|
19
|
+
null
|
|
20
|
+
|
|
21
|
+
token symbolic names:
|
|
22
|
+
null
|
|
23
|
+
L_PAREN
|
|
24
|
+
R_PAREN
|
|
25
|
+
EQUAL
|
|
26
|
+
GREATER
|
|
27
|
+
LESSER
|
|
28
|
+
GREATER_EQUAL
|
|
29
|
+
LESSER_EQUAL
|
|
30
|
+
NOT_EQUAL
|
|
31
|
+
EQUAL_EQUAL
|
|
32
|
+
SLASH
|
|
33
|
+
AND
|
|
34
|
+
OR
|
|
35
|
+
NOT
|
|
36
|
+
DOT
|
|
37
|
+
QUOTED_STRING
|
|
38
|
+
SIMPLE_STRING
|
|
39
|
+
WS
|
|
40
|
+
|
|
41
|
+
rule names:
|
|
42
|
+
L_PAREN
|
|
43
|
+
R_PAREN
|
|
44
|
+
EQUAL
|
|
45
|
+
GREATER
|
|
46
|
+
LESSER
|
|
47
|
+
GREATER_EQUAL
|
|
48
|
+
LESSER_EQUAL
|
|
49
|
+
NOT_EQUAL
|
|
50
|
+
EQUAL_EQUAL
|
|
51
|
+
SLASH
|
|
52
|
+
AND
|
|
53
|
+
OR
|
|
54
|
+
NOT
|
|
55
|
+
DOT
|
|
56
|
+
QUOTED_STRING
|
|
57
|
+
SIMPLE_STRING
|
|
58
|
+
WS
|
|
59
|
+
|
|
60
|
+
channel names:
|
|
61
|
+
DEFAULT_TOKEN_CHANNEL
|
|
62
|
+
HIDDEN
|
|
63
|
+
|
|
64
|
+
mode names:
|
|
65
|
+
DEFAULT_MODE
|
|
66
|
+
|
|
67
|
+
atn:
|
|
68
|
+
[4, 0, 17, 92, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 2, 7, 7, 7, 2, 8, 7, 8, 2, 9, 7, 9, 2, 10, 7, 10, 2, 11, 7, 11, 2, 12, 7, 12, 2, 13, 7, 13, 2, 14, 7, 14, 2, 15, 7, 15, 2, 16, 7, 16, 1, 0, 1, 0, 1, 1, 1, 1, 1, 2, 1, 2, 1, 3, 1, 3, 1, 4, 1, 4, 1, 5, 1, 5, 1, 5, 1, 6, 1, 6, 1, 6, 1, 7, 1, 7, 1, 7, 1, 8, 1, 8, 1, 8, 1, 9, 1, 9, 1, 10, 1, 10, 1, 10, 1, 10, 1, 11, 1, 11, 1, 11, 1, 12, 1, 12, 1, 12, 1, 12, 1, 13, 1, 13, 1, 14, 1, 14, 1, 14, 1, 14, 5, 14, 77, 8, 14, 10, 14, 12, 14, 80, 9, 14, 1, 14, 1, 14, 1, 15, 4, 15, 85, 8, 15, 11, 15, 12, 15, 86, 1, 16, 1, 16, 1, 16, 1, 16, 0, 0, 17, 1, 1, 3, 2, 5, 3, 7, 4, 9, 5, 11, 6, 13, 7, 15, 8, 17, 9, 19, 10, 21, 11, 23, 12, 25, 13, 27, 14, 29, 15, 31, 16, 33, 17, 1, 0, 9, 2, 0, 65, 65, 97, 97, 2, 0, 78, 78, 110, 110, 2, 0, 68, 68, 100, 100, 2, 0, 79, 79, 111, 111, 2, 0, 82, 82, 114, 114, 2, 0, 84, 84, 116, 116, 2, 0, 34, 34, 92, 92, 8, 0, 9, 10, 13, 13, 32, 32, 34, 34, 40, 41, 47, 47, 60, 62, 92, 92, 3, 0, 9, 10, 13, 13, 32, 32, 94, 0, 1, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 9, 1, 0, 0, 0, 0, 11, 1, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 15, 1, 0, 0, 0, 0, 17, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 21, 1, 0, 0, 0, 0, 23, 1, 0, 0, 0, 0, 25, 1, 0, 0, 0, 0, 27, 1, 0, 0, 0, 0, 29, 1, 0, 0, 0, 0, 31, 1, 0, 0, 0, 0, 33, 1, 0, 0, 0, 1, 35, 1, 0, 0, 0, 3, 37, 1, 0, 0, 0, 5, 39, 1, 0, 0, 0, 7, 41, 1, 0, 0, 0, 9, 43, 1, 0, 0, 0, 11, 45, 1, 0, 0, 0, 13, 48, 1, 0, 0, 0, 15, 51, 1, 0, 0, 0, 17, 54, 1, 0, 0, 0, 19, 57, 1, 0, 0, 0, 21, 59, 1, 0, 0, 0, 23, 63, 1, 0, 0, 0, 25, 66, 1, 0, 0, 0, 27, 70, 1, 0, 0, 0, 29, 72, 1, 0, 0, 0, 31, 84, 1, 0, 0, 0, 33, 88, 1, 0, 0, 0, 35, 36, 5, 40, 0, 0, 36, 2, 1, 0, 0, 0, 37, 38, 5, 41, 0, 0, 38, 4, 1, 0, 0, 0, 39, 40, 5, 61, 0, 0, 40, 6, 1, 0, 0, 0, 41, 42, 5, 62, 0, 0, 42, 8, 1, 0, 0, 0, 43, 44, 5, 60, 0, 0, 44, 10, 1, 0, 0, 0, 45, 46, 5, 62, 0, 0, 46, 47, 5, 61, 0, 0, 47, 12, 1, 0, 0, 0, 48, 49, 5, 60, 0, 0, 49, 50, 5, 61, 0, 0, 50, 14, 1, 0, 0, 0, 51, 52, 5, 60, 0, 0, 52, 53, 5, 62, 0, 0, 53, 16, 1, 0, 0, 0, 54, 55, 5, 61, 0, 0, 55, 56, 5, 61, 0, 0, 56, 18, 1, 0, 0, 0, 57, 58, 5, 47, 0, 0, 58, 20, 1, 0, 0, 0, 59, 60, 7, 0, 0, 0, 60, 61, 7, 1, 0, 0, 61, 62, 7, 2, 0, 0, 62, 22, 1, 0, 0, 0, 63, 64, 7, 3, 0, 0, 64, 65, 7, 4, 0, 0, 65, 24, 1, 0, 0, 0, 66, 67, 7, 1, 0, 0, 67, 68, 7, 3, 0, 0, 68, 69, 7, 5, 0, 0, 69, 26, 1, 0, 0, 0, 70, 71, 5, 46, 0, 0, 71, 28, 1, 0, 0, 0, 72, 78, 5, 34, 0, 0, 73, 74, 5, 92, 0, 0, 74, 77, 7, 6, 0, 0, 75, 77, 8, 6, 0, 0, 76, 73, 1, 0, 0, 0, 76, 75, 1, 0, 0, 0, 77, 80, 1, 0, 0, 0, 78, 76, 1, 0, 0, 0, 78, 79, 1, 0, 0, 0, 79, 81, 1, 0, 0, 0, 80, 78, 1, 0, 0, 0, 81, 82, 5, 34, 0, 0, 82, 30, 1, 0, 0, 0, 83, 85, 8, 7, 0, 0, 84, 83, 1, 0, 0, 0, 85, 86, 1, 0, 0, 0, 86, 84, 1, 0, 0, 0, 86, 87, 1, 0, 0, 0, 87, 32, 1, 0, 0, 0, 88, 89, 7, 8, 0, 0, 89, 90, 1, 0, 0, 0, 90, 91, 6, 16, 0, 0, 91, 34, 1, 0, 0, 0, 4, 0, 76, 78, 86, 1, 0, 1, 0]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Generated from LexLexer.g4 by ANTLR 4.13.2
|
|
2
|
+
from antlr4 import *
|
|
3
|
+
from io import StringIO
|
|
4
|
+
import sys
|
|
5
|
+
if sys.version_info[1] > 5:
|
|
6
|
+
from typing import TextIO
|
|
7
|
+
else:
|
|
8
|
+
from typing.io import TextIO
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def serializedATN():
|
|
12
|
+
return [
|
|
13
|
+
4,0,17,92,6,-1,2,0,7,0,2,1,7,1,2,2,7,2,2,3,7,3,2,4,7,4,2,5,7,5,2,
|
|
14
|
+
6,7,6,2,7,7,7,2,8,7,8,2,9,7,9,2,10,7,10,2,11,7,11,2,12,7,12,2,13,
|
|
15
|
+
7,13,2,14,7,14,2,15,7,15,2,16,7,16,1,0,1,0,1,1,1,1,1,2,1,2,1,3,1,
|
|
16
|
+
3,1,4,1,4,1,5,1,5,1,5,1,6,1,6,1,6,1,7,1,7,1,7,1,8,1,8,1,8,1,9,1,
|
|
17
|
+
9,1,10,1,10,1,10,1,10,1,11,1,11,1,11,1,12,1,12,1,12,1,12,1,13,1,
|
|
18
|
+
13,1,14,1,14,1,14,1,14,5,14,77,8,14,10,14,12,14,80,9,14,1,14,1,14,
|
|
19
|
+
1,15,4,15,85,8,15,11,15,12,15,86,1,16,1,16,1,16,1,16,0,0,17,1,1,
|
|
20
|
+
3,2,5,3,7,4,9,5,11,6,13,7,15,8,17,9,19,10,21,11,23,12,25,13,27,14,
|
|
21
|
+
29,15,31,16,33,17,1,0,9,2,0,65,65,97,97,2,0,78,78,110,110,2,0,68,
|
|
22
|
+
68,100,100,2,0,79,79,111,111,2,0,82,82,114,114,2,0,84,84,116,116,
|
|
23
|
+
2,0,34,34,92,92,8,0,9,10,13,13,32,32,34,34,40,41,47,47,60,62,92,
|
|
24
|
+
92,3,0,9,10,13,13,32,32,94,0,1,1,0,0,0,0,3,1,0,0,0,0,5,1,0,0,0,0,
|
|
25
|
+
7,1,0,0,0,0,9,1,0,0,0,0,11,1,0,0,0,0,13,1,0,0,0,0,15,1,0,0,0,0,17,
|
|
26
|
+
1,0,0,0,0,19,1,0,0,0,0,21,1,0,0,0,0,23,1,0,0,0,0,25,1,0,0,0,0,27,
|
|
27
|
+
1,0,0,0,0,29,1,0,0,0,0,31,1,0,0,0,0,33,1,0,0,0,1,35,1,0,0,0,3,37,
|
|
28
|
+
1,0,0,0,5,39,1,0,0,0,7,41,1,0,0,0,9,43,1,0,0,0,11,45,1,0,0,0,13,
|
|
29
|
+
48,1,0,0,0,15,51,1,0,0,0,17,54,1,0,0,0,19,57,1,0,0,0,21,59,1,0,0,
|
|
30
|
+
0,23,63,1,0,0,0,25,66,1,0,0,0,27,70,1,0,0,0,29,72,1,0,0,0,31,84,
|
|
31
|
+
1,0,0,0,33,88,1,0,0,0,35,36,5,40,0,0,36,2,1,0,0,0,37,38,5,41,0,0,
|
|
32
|
+
38,4,1,0,0,0,39,40,5,61,0,0,40,6,1,0,0,0,41,42,5,62,0,0,42,8,1,0,
|
|
33
|
+
0,0,43,44,5,60,0,0,44,10,1,0,0,0,45,46,5,62,0,0,46,47,5,61,0,0,47,
|
|
34
|
+
12,1,0,0,0,48,49,5,60,0,0,49,50,5,61,0,0,50,14,1,0,0,0,51,52,5,60,
|
|
35
|
+
0,0,52,53,5,62,0,0,53,16,1,0,0,0,54,55,5,61,0,0,55,56,5,61,0,0,56,
|
|
36
|
+
18,1,0,0,0,57,58,5,47,0,0,58,20,1,0,0,0,59,60,7,0,0,0,60,61,7,1,
|
|
37
|
+
0,0,61,62,7,2,0,0,62,22,1,0,0,0,63,64,7,3,0,0,64,65,7,4,0,0,65,24,
|
|
38
|
+
1,0,0,0,66,67,7,1,0,0,67,68,7,3,0,0,68,69,7,5,0,0,69,26,1,0,0,0,
|
|
39
|
+
70,71,5,46,0,0,71,28,1,0,0,0,72,78,5,34,0,0,73,74,5,92,0,0,74,77,
|
|
40
|
+
7,6,0,0,75,77,8,6,0,0,76,73,1,0,0,0,76,75,1,0,0,0,77,80,1,0,0,0,
|
|
41
|
+
78,76,1,0,0,0,78,79,1,0,0,0,79,81,1,0,0,0,80,78,1,0,0,0,81,82,5,
|
|
42
|
+
34,0,0,82,30,1,0,0,0,83,85,8,7,0,0,84,83,1,0,0,0,85,86,1,0,0,0,86,
|
|
43
|
+
84,1,0,0,0,86,87,1,0,0,0,87,32,1,0,0,0,88,89,7,8,0,0,89,90,1,0,0,
|
|
44
|
+
0,90,91,6,16,0,0,91,34,1,0,0,0,4,0,76,78,86,1,0,1,0
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
class LexLexer(Lexer):
|
|
48
|
+
|
|
49
|
+
atn = ATNDeserializer().deserialize(serializedATN())
|
|
50
|
+
|
|
51
|
+
decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
|
|
52
|
+
|
|
53
|
+
L_PAREN = 1
|
|
54
|
+
R_PAREN = 2
|
|
55
|
+
EQUAL = 3
|
|
56
|
+
GREATER = 4
|
|
57
|
+
LESSER = 5
|
|
58
|
+
GREATER_EQUAL = 6
|
|
59
|
+
LESSER_EQUAL = 7
|
|
60
|
+
NOT_EQUAL = 8
|
|
61
|
+
EQUAL_EQUAL = 9
|
|
62
|
+
SLASH = 10
|
|
63
|
+
AND = 11
|
|
64
|
+
OR = 12
|
|
65
|
+
NOT = 13
|
|
66
|
+
DOT = 14
|
|
67
|
+
QUOTED_STRING = 15
|
|
68
|
+
SIMPLE_STRING = 16
|
|
69
|
+
WS = 17
|
|
70
|
+
|
|
71
|
+
channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ]
|
|
72
|
+
|
|
73
|
+
modeNames = [ "DEFAULT_MODE" ]
|
|
74
|
+
|
|
75
|
+
literalNames = [ "<INVALID>",
|
|
76
|
+
"'('", "')'", "'='", "'>'", "'<'", "'>='", "'<='", "'<>'", "'=='",
|
|
77
|
+
"'/'", "'.'" ]
|
|
78
|
+
|
|
79
|
+
symbolicNames = [ "<INVALID>",
|
|
80
|
+
"L_PAREN", "R_PAREN", "EQUAL", "GREATER", "LESSER", "GREATER_EQUAL",
|
|
81
|
+
"LESSER_EQUAL", "NOT_EQUAL", "EQUAL_EQUAL", "SLASH", "AND",
|
|
82
|
+
"OR", "NOT", "DOT", "QUOTED_STRING", "SIMPLE_STRING", "WS" ]
|
|
83
|
+
|
|
84
|
+
ruleNames = [ "L_PAREN", "R_PAREN", "EQUAL", "GREATER", "LESSER", "GREATER_EQUAL",
|
|
85
|
+
"LESSER_EQUAL", "NOT_EQUAL", "EQUAL_EQUAL", "SLASH", "AND",
|
|
86
|
+
"OR", "NOT", "DOT", "QUOTED_STRING", "SIMPLE_STRING",
|
|
87
|
+
"WS" ]
|
|
88
|
+
|
|
89
|
+
grammarFileName = "LexLexer.g4"
|
|
90
|
+
|
|
91
|
+
def __init__(self, input=None, output:TextIO = sys.stdout):
|
|
92
|
+
super().__init__(input, output)
|
|
93
|
+
self.checkVersion("4.13.2")
|
|
94
|
+
self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
|
|
95
|
+
self._actions = None
|
|
96
|
+
self._predicates = None
|
|
97
|
+
|
|
98
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
L_PAREN=1
|
|
2
|
+
R_PAREN=2
|
|
3
|
+
EQUAL=3
|
|
4
|
+
GREATER=4
|
|
5
|
+
LESSER=5
|
|
6
|
+
GREATER_EQUAL=6
|
|
7
|
+
LESSER_EQUAL=7
|
|
8
|
+
NOT_EQUAL=8
|
|
9
|
+
EQUAL_EQUAL=9
|
|
10
|
+
SLASH=10
|
|
11
|
+
AND=11
|
|
12
|
+
OR=12
|
|
13
|
+
NOT=13
|
|
14
|
+
DOT=14
|
|
15
|
+
QUOTED_STRING=15
|
|
16
|
+
SIMPLE_STRING=16
|
|
17
|
+
WS=17
|
|
18
|
+
'('=1
|
|
19
|
+
')'=2
|
|
20
|
+
'='=3
|
|
21
|
+
'>'=4
|
|
22
|
+
'<'=5
|
|
23
|
+
'>='=6
|
|
24
|
+
'<='=7
|
|
25
|
+
'<>'=8
|
|
26
|
+
'=='=9
|
|
27
|
+
'/'=10
|
|
28
|
+
'.'=14
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
parser grammar LexParser;
|
|
2
|
+
options {
|
|
3
|
+
tokenVocab = LexLexer;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
/* ABNF: https://docs.oasis-open.org/search-ws/searchRetrieve/v1.0/os/part5-cql/searchRetrieve-v1.0-os-part5-cql.html */
|
|
7
|
+
/* Test in: http://lab.antlr.org/ */
|
|
8
|
+
/* Slides: https://www.esa.informatik.tu-darmstadt.de/archive/twiki/pub/Lectures/Compiler113De/antlr-v4-handout.pdf */
|
|
9
|
+
|
|
10
|
+
/* search clauses */
|
|
11
|
+
|
|
12
|
+
// ! original ABNF rule: does not work due to left-recursion
|
|
13
|
+
//
|
|
14
|
+
// query: search_clause_group;
|
|
15
|
+
// search_clause_group: (search_clause_group boolean_modified subquery) | subquery;
|
|
16
|
+
// subquery: (L_PAREN query R_PAREN) | search_clause;
|
|
17
|
+
|
|
18
|
+
// flat lists as alternative? need to be interpreted left to right
|
|
19
|
+
//
|
|
20
|
+
// search_clause_group: (subquery boolean_modified)* subquery;
|
|
21
|
+
// search_clause_group: subquery (boolean_modified subquery)*;
|
|
22
|
+
|
|
23
|
+
// simplify and make it flat
|
|
24
|
+
query: boolean_query EOF;
|
|
25
|
+
|
|
26
|
+
boolean_query: subquery (boolean_modified subquery)*;
|
|
27
|
+
|
|
28
|
+
subquery: L_PAREN boolean_query R_PAREN | search_clause;
|
|
29
|
+
|
|
30
|
+
search_clause: (index relation_modified)? search_term;
|
|
31
|
+
|
|
32
|
+
search_term: SIMPLE_STRING | QUOTED_STRING;
|
|
33
|
+
|
|
34
|
+
/* indexes */
|
|
35
|
+
|
|
36
|
+
index: simple_name | prefix_name;
|
|
37
|
+
|
|
38
|
+
/* relations */
|
|
39
|
+
|
|
40
|
+
relation_modified: relation modifier_list?;
|
|
41
|
+
|
|
42
|
+
relation: relation_name | relation_symbol;
|
|
43
|
+
|
|
44
|
+
relation_name: simple_name | prefix_name;
|
|
45
|
+
|
|
46
|
+
relation_symbol:
|
|
47
|
+
EQUAL
|
|
48
|
+
| GREATER
|
|
49
|
+
| LESSER
|
|
50
|
+
| GREATER_EQUAL
|
|
51
|
+
| LESSER_EQUAL
|
|
52
|
+
| NOT_EQUAL
|
|
53
|
+
| EQUAL_EQUAL;
|
|
54
|
+
|
|
55
|
+
/* booleans */
|
|
56
|
+
|
|
57
|
+
boolean_modified: r_boolean modifier_list?;
|
|
58
|
+
|
|
59
|
+
r_boolean: AND | OR | NOT;
|
|
60
|
+
|
|
61
|
+
/* modifiers */
|
|
62
|
+
|
|
63
|
+
modifier_list: modifier+;
|
|
64
|
+
|
|
65
|
+
modifier: SLASH modifier_name modifier_relation?;
|
|
66
|
+
|
|
67
|
+
modifier_name: simple_name;
|
|
68
|
+
|
|
69
|
+
modifier_relation: relation_symbol modifier_value;
|
|
70
|
+
|
|
71
|
+
modifier_value: SIMPLE_STRING | QUOTED_STRING;
|
|
72
|
+
|
|
73
|
+
/* terminal aliases */
|
|
74
|
+
|
|
75
|
+
prefix_name: prefix DOT simple_name;
|
|
76
|
+
|
|
77
|
+
prefix: simple_name;
|
|
78
|
+
|
|
79
|
+
simple_name: SIMPLE_STRING;
|