flowquery 1.0.17 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/python-publish.yml +8 -9
- package/dist/flowquery.min.js +1 -1
- package/dist/graph/pattern_expression.d.ts +1 -0
- package/dist/graph/pattern_expression.d.ts.map +1 -1
- package/dist/graph/pattern_expression.js +14 -3
- package/dist/graph/pattern_expression.js.map +1 -1
- package/dist/parsing/parser.d.ts.map +1 -1
- package/dist/parsing/parser.js +6 -5
- package/dist/parsing/parser.js.map +1 -1
- package/docs/flowquery.min.js +1 -1
- package/flowquery-py/CONTRIBUTING.md +127 -0
- package/flowquery-py/README.md +13 -112
- package/flowquery-py/pyproject.toml +1 -1
- package/flowquery-py/src/graph/pattern_expression.py +6 -3
- package/flowquery-py/src/io/command_line.py +44 -2
- package/flowquery-py/src/parsing/base_parser.py +2 -2
- package/flowquery-py/src/parsing/operations/load.py +6 -0
- package/flowquery-py/src/parsing/parser.py +4 -5
- package/flowquery-py/src/tokenization/token.py +122 -176
- package/flowquery-py/src/tokenization/tokenizer.py +4 -4
- package/flowquery-py/tests/parsing/test_parser.py +6 -0
- package/flowquery-vscode/flowQueryEngine/flowquery.min.js +1 -1
- package/package.json +1 -1
- package/src/graph/pattern_expression.ts +14 -3
- package/src/parsing/parser.ts +7 -4
- package/tests/parsing/parser.test.ts +8 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Contributing to FlowQuery Python
|
|
2
|
+
|
|
3
|
+
This guide covers setting up a development environment and contributing to the Python implementation of FlowQuery.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
### From Source
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/microsoft/FlowQuery.git
|
|
11
|
+
cd FlowQuery/flowquery-py
|
|
12
|
+
pip install -e .
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### With Development Dependencies
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install -e ".[dev]"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Using Conda (Recommended)
|
|
22
|
+
|
|
23
|
+
**Windows (PowerShell):**
|
|
24
|
+
|
|
25
|
+
```powershell
|
|
26
|
+
cd flowquery-py
|
|
27
|
+
.\setup_env.ps1
|
|
28
|
+
conda activate flowquery
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Linux/macOS:**
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
cd flowquery-py
|
|
35
|
+
chmod +x setup_env.sh
|
|
36
|
+
./setup_env.sh
|
|
37
|
+
conda activate flowquery
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The setup scripts automatically:
|
|
41
|
+
|
|
42
|
+
1. Read the Python version from `pyproject.toml`
|
|
43
|
+
2. Create a conda environment named `flowquery`
|
|
44
|
+
3. Install the package with all dev dependencies
|
|
45
|
+
|
|
46
|
+
## Requirements
|
|
47
|
+
|
|
48
|
+
- Python 3.10+ (defined in `pyproject.toml`)
|
|
49
|
+
- pytest (for running tests)
|
|
50
|
+
- pytest-asyncio (for async test support)
|
|
51
|
+
- aiohttp (for HTTP requests)
|
|
52
|
+
|
|
53
|
+
All dependencies are managed in `pyproject.toml`.
|
|
54
|
+
|
|
55
|
+
## Running Tests
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pytest tests/
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Project Structure
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
flowquery-py/
|
|
65
|
+
├── pyproject.toml # Dependencies & project config (single source of truth)
|
|
66
|
+
├── setup_env.ps1 # Windows conda setup script
|
|
67
|
+
├── setup_env.sh # Linux/macOS conda setup script
|
|
68
|
+
├── README.md
|
|
69
|
+
├── src/
|
|
70
|
+
│ ├── __init__.py # Main package entry point
|
|
71
|
+
│ ├── extensibility.py # Public API for custom functions
|
|
72
|
+
│ ├── compute/
|
|
73
|
+
│ │ └── runner.py # Query execution engine
|
|
74
|
+
│ ├── graph/
|
|
75
|
+
│ │ ├── node.py # Graph node representation
|
|
76
|
+
│ │ ├── relationship.py # Graph relationship representation
|
|
77
|
+
│ │ ├── pattern.py # Pattern matching
|
|
78
|
+
│ │ └── database.py # In-memory graph database
|
|
79
|
+
│ ├── io/
|
|
80
|
+
│ │ └── command_line.py # Interactive REPL
|
|
81
|
+
│ ├── parsing/
|
|
82
|
+
│ │ ├── parser.py # Main parser
|
|
83
|
+
│ │ ├── ast_node.py # AST node base class
|
|
84
|
+
│ │ ├── expressions/ # Expression types (numbers, strings, operators)
|
|
85
|
+
│ │ ├── functions/ # Built-in and custom functions
|
|
86
|
+
│ │ ├── operations/ # Query operations (WITH, RETURN, UNWIND, etc.)
|
|
87
|
+
│ │ ├── components/ # LOAD clause components
|
|
88
|
+
│ │ ├── data_structures/ # Arrays, objects, lookups
|
|
89
|
+
│ │ └── logic/ # CASE/WHEN/THEN/ELSE
|
|
90
|
+
│ ├── tokenization/
|
|
91
|
+
│ │ ├── tokenizer.py # Lexer
|
|
92
|
+
│ │ ├── token.py # Token class
|
|
93
|
+
│ │ └── ... # Token types and mappers
|
|
94
|
+
│ └── utils/
|
|
95
|
+
│ ├── string_utils.py # String manipulation utilities
|
|
96
|
+
│ └── object_utils.py # Object utilities
|
|
97
|
+
└── tests/
|
|
98
|
+
├── test_extensibility.py
|
|
99
|
+
├── compute/
|
|
100
|
+
│ └── test_runner.py
|
|
101
|
+
├── graph/
|
|
102
|
+
│ ├── test_create.py
|
|
103
|
+
│ ├── test_data.py
|
|
104
|
+
│ └── test_match.py
|
|
105
|
+
├── parsing/
|
|
106
|
+
│ ├── test_parser.py
|
|
107
|
+
│ ├── test_context.py
|
|
108
|
+
│ └── test_expression.py
|
|
109
|
+
└── tokenization/
|
|
110
|
+
├── test_tokenizer.py
|
|
111
|
+
├── test_token_mapper.py
|
|
112
|
+
└── test_trie.py
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Code Style
|
|
116
|
+
|
|
117
|
+
- Follow PEP 8 guidelines
|
|
118
|
+
- Use type hints where appropriate
|
|
119
|
+
- Write docstrings for public APIs
|
|
120
|
+
|
|
121
|
+
## Submitting Changes
|
|
122
|
+
|
|
123
|
+
1. Fork the repository
|
|
124
|
+
2. Create a feature branch
|
|
125
|
+
3. Make your changes
|
|
126
|
+
4. Run tests: `pytest tests/`
|
|
127
|
+
5. Submit a pull request
|
package/flowquery-py/README.md
CHANGED
|
@@ -1,68 +1,24 @@
|
|
|
1
|
-
# FlowQuery
|
|
1
|
+
# FlowQuery
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
A declarative query language for data processing pipelines.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
|
-
### From Source
|
|
8
|
-
|
|
9
7
|
```bash
|
|
10
|
-
|
|
11
|
-
cd FlowQuery/flowquery-py
|
|
12
|
-
pip install -e .
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
### With Development Dependencies
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
pip install -e ".[dev]"
|
|
8
|
+
pip install flowquery
|
|
19
9
|
```
|
|
20
10
|
|
|
21
11
|
## Quick Start
|
|
22
12
|
|
|
23
13
|
### Command Line Interface
|
|
24
14
|
|
|
25
|
-
|
|
15
|
+
Start the interactive REPL:
|
|
26
16
|
|
|
27
17
|
```bash
|
|
28
18
|
flowquery
|
|
29
19
|
```
|
|
30
20
|
|
|
31
|
-
###
|
|
32
|
-
|
|
33
|
-
**Windows (PowerShell):**
|
|
34
|
-
|
|
35
|
-
```powershell
|
|
36
|
-
cd flowquery-py
|
|
37
|
-
.\setup_env.ps1
|
|
38
|
-
conda activate flowquery
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
**Linux/macOS:**
|
|
42
|
-
|
|
43
|
-
```bash
|
|
44
|
-
cd flowquery-py
|
|
45
|
-
chmod +x setup_env.sh
|
|
46
|
-
./setup_env.sh
|
|
47
|
-
conda activate flowquery
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
The setup scripts automatically:
|
|
51
|
-
|
|
52
|
-
1. Read the Python version from `pyproject.toml`
|
|
53
|
-
2. Create a conda environment named `flowquery`
|
|
54
|
-
3. Install the package with all dev dependencies
|
|
55
|
-
|
|
56
|
-
## Requirements
|
|
57
|
-
|
|
58
|
-
- Python 3.10+ (defined in `pyproject.toml`)
|
|
59
|
-
- pytest (for running tests)
|
|
60
|
-
- pytest-asyncio (for async test support)
|
|
61
|
-
- aiohttp (for HTTP requests)
|
|
62
|
-
|
|
63
|
-
All dependencies are managed in `pyproject.toml`.
|
|
64
|
-
|
|
65
|
-
## Programmatic Usage
|
|
21
|
+
### Programmatic Usage
|
|
66
22
|
|
|
67
23
|
```python
|
|
68
24
|
import asyncio
|
|
@@ -73,66 +29,6 @@ asyncio.run(runner.run())
|
|
|
73
29
|
print(runner.results) # [{'result': 2}]
|
|
74
30
|
```
|
|
75
31
|
|
|
76
|
-
## Running Tests
|
|
77
|
-
|
|
78
|
-
```bash
|
|
79
|
-
pytest tests/
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
## Project Structure
|
|
83
|
-
|
|
84
|
-
```
|
|
85
|
-
flowquery-py/
|
|
86
|
-
├── pyproject.toml # Dependencies & project config (single source of truth)
|
|
87
|
-
├── setup_env.ps1 # Windows conda setup script
|
|
88
|
-
├── setup_env.sh # Linux/macOS conda setup script
|
|
89
|
-
├── README.md
|
|
90
|
-
├── src/
|
|
91
|
-
│ ├── __init__.py # Main package entry point
|
|
92
|
-
│ ├── extensibility.py # Public API for custom functions
|
|
93
|
-
│ ├── compute/
|
|
94
|
-
│ │ └── runner.py # Query execution engine
|
|
95
|
-
│ ├── graph/
|
|
96
|
-
│ │ ├── node.py # Graph node representation
|
|
97
|
-
│ │ ├── relationship.py # Graph relationship representation
|
|
98
|
-
│ │ ├── pattern.py # Pattern matching
|
|
99
|
-
│ │ └── database.py # In-memory graph database
|
|
100
|
-
│ ├── io/
|
|
101
|
-
│ │ └── command_line.py # Interactive REPL
|
|
102
|
-
│ ├── parsing/
|
|
103
|
-
│ │ ├── parser.py # Main parser
|
|
104
|
-
│ │ ├── ast_node.py # AST node base class
|
|
105
|
-
│ │ ├── expressions/ # Expression types (numbers, strings, operators)
|
|
106
|
-
│ │ ├── functions/ # Built-in and custom functions
|
|
107
|
-
│ │ ├── operations/ # Query operations (WITH, RETURN, UNWIND, etc.)
|
|
108
|
-
│ │ ├── components/ # LOAD clause components
|
|
109
|
-
│ │ ├── data_structures/ # Arrays, objects, lookups
|
|
110
|
-
│ │ └── logic/ # CASE/WHEN/THEN/ELSE
|
|
111
|
-
│ ├── tokenization/
|
|
112
|
-
│ │ ├── tokenizer.py # Lexer
|
|
113
|
-
│ │ ├── token.py # Token class
|
|
114
|
-
│ │ └── ... # Token types and mappers
|
|
115
|
-
│ └── utils/
|
|
116
|
-
│ ├── string_utils.py # String manipulation utilities
|
|
117
|
-
│ └── object_utils.py # Object utilities
|
|
118
|
-
└── tests/
|
|
119
|
-
├── test_extensibility.py
|
|
120
|
-
├── compute/
|
|
121
|
-
│ └── test_runner.py
|
|
122
|
-
├── graph/
|
|
123
|
-
│ ├── test_create.py
|
|
124
|
-
│ ├── test_data.py
|
|
125
|
-
│ └── test_match.py
|
|
126
|
-
├── parsing/
|
|
127
|
-
│ ├── test_parser.py
|
|
128
|
-
│ ├── test_context.py
|
|
129
|
-
│ └── test_expression.py
|
|
130
|
-
└── tokenization/
|
|
131
|
-
├── test_tokenizer.py
|
|
132
|
-
├── test_token_mapper.py
|
|
133
|
-
└── test_trie.py
|
|
134
|
-
```
|
|
135
|
-
|
|
136
32
|
## Creating Custom Functions
|
|
137
33
|
|
|
138
34
|
```python
|
|
@@ -155,12 +51,17 @@ class UpperCase(Function):
|
|
|
155
51
|
return str(self.get_children()[0].value()).upper()
|
|
156
52
|
```
|
|
157
53
|
|
|
54
|
+
## Documentation
|
|
55
|
+
|
|
56
|
+
- [Full Documentation](https://github.com/microsoft/FlowQuery)
|
|
57
|
+
- [Contributing Guide](https://github.com/microsoft/FlowQuery/blob/main/flowquery-py/CONTRIBUTING.md)
|
|
58
|
+
|
|
158
59
|
## License
|
|
159
60
|
|
|
160
|
-
MIT License - see [LICENSE](LICENSE) for details.
|
|
61
|
+
MIT License - see [LICENSE](https://github.com/microsoft/FlowQuery/blob/main/LICENSE) for details.
|
|
161
62
|
|
|
162
63
|
## Links
|
|
163
64
|
|
|
164
|
-
- [Homepage](https://github.com/microsoft/FlowQuery
|
|
165
|
-
- [Repository](https://github.com/microsoft/FlowQuery
|
|
65
|
+
- [Homepage](https://github.com/microsoft/FlowQuery)
|
|
66
|
+
- [Repository](https://github.com/microsoft/FlowQuery)
|
|
166
67
|
- [Issues](https://github.com/microsoft/FlowQuery/issues)
|
|
@@ -20,11 +20,14 @@ class PatternExpression(Pattern):
|
|
|
20
20
|
self._evaluation: bool = False
|
|
21
21
|
|
|
22
22
|
def add_element(self, element) -> None:
|
|
23
|
-
"""Add an element to the pattern, ensuring it starts with a NodeReference."""
|
|
24
|
-
if len(self._chain) == 0 and not isinstance(element, NodeReference):
|
|
25
|
-
raise ValueError("PatternExpression must start with a NodeReference")
|
|
26
23
|
super().add_element(element)
|
|
27
24
|
|
|
25
|
+
def verify(self) -> None:
|
|
26
|
+
if(len(self._chain) == 0):
|
|
27
|
+
raise ValueError("PatternExpression cannot be empty")
|
|
28
|
+
if not(any(isinstance(el, NodeReference) for el in self._chain if isinstance(el, ASTNode))):
|
|
29
|
+
raise ValueError("PatternExpression must contain at least one NodeReference")
|
|
30
|
+
|
|
28
31
|
@property
|
|
29
32
|
def identifier(self):
|
|
30
33
|
return None
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Interactive command-line interface for FlowQuery."""
|
|
2
2
|
|
|
3
|
+
import argparse
|
|
3
4
|
import asyncio
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
@@ -15,8 +16,26 @@ class CommandLine:
|
|
|
15
16
|
Example:
|
|
16
17
|
cli = CommandLine()
|
|
17
18
|
cli.loop() # Starts interactive mode
|
|
19
|
+
|
|
20
|
+
# Or execute a single query:
|
|
21
|
+
cli.execute("load json from 'https://example.com/data' as d return d")
|
|
18
22
|
"""
|
|
19
23
|
|
|
24
|
+
def execute(self, query: str) -> None:
|
|
25
|
+
"""Execute a single FlowQuery statement and print results.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
query: The FlowQuery statement to execute.
|
|
29
|
+
"""
|
|
30
|
+
# Remove the termination semicolon if present
|
|
31
|
+
query = query.strip().rstrip(";")
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
runner = Runner(query)
|
|
35
|
+
asyncio.run(self._execute(runner))
|
|
36
|
+
except Exception as e:
|
|
37
|
+
print(f"Error: {e}")
|
|
38
|
+
|
|
20
39
|
def loop(self) -> None:
|
|
21
40
|
"""Starts the interactive command loop.
|
|
22
41
|
|
|
@@ -63,5 +82,28 @@ class CommandLine:
|
|
|
63
82
|
|
|
64
83
|
|
|
65
84
|
def main() -> None:
|
|
66
|
-
"""Entry point for the flowquery CLI command.
|
|
67
|
-
|
|
85
|
+
"""Entry point for the flowquery CLI command.
|
|
86
|
+
|
|
87
|
+
Usage:
|
|
88
|
+
flowquery # Start interactive mode
|
|
89
|
+
flowquery -c "query" # Execute a single query
|
|
90
|
+
flowquery --command "query"
|
|
91
|
+
"""
|
|
92
|
+
parser = argparse.ArgumentParser(
|
|
93
|
+
description="FlowQuery - A declarative query language for data processing pipelines",
|
|
94
|
+
prog="flowquery"
|
|
95
|
+
)
|
|
96
|
+
parser.add_argument(
|
|
97
|
+
"-c", "--command",
|
|
98
|
+
type=str,
|
|
99
|
+
metavar="QUERY",
|
|
100
|
+
help="Execute a FlowQuery statement and exit"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
args = parser.parse_args()
|
|
104
|
+
cli = CommandLine()
|
|
105
|
+
|
|
106
|
+
if args.command:
|
|
107
|
+
cli.execute(args.command)
|
|
108
|
+
else:
|
|
109
|
+
cli.loop()
|
|
@@ -69,7 +69,7 @@ class BaseParser:
|
|
|
69
69
|
The current token, or EOF if at the end
|
|
70
70
|
"""
|
|
71
71
|
if self._token_index >= len(self._tokens):
|
|
72
|
-
return Token.EOF
|
|
72
|
+
return Token.EOF()
|
|
73
73
|
return self._tokens[self._token_index]
|
|
74
74
|
|
|
75
75
|
@property
|
|
@@ -80,5 +80,5 @@ class BaseParser:
|
|
|
80
80
|
The previous token, or EOF if at the beginning
|
|
81
81
|
"""
|
|
82
82
|
if self._token_index - 1 < 0:
|
|
83
|
-
return Token.EOF
|
|
83
|
+
return Token.EOF()
|
|
84
84
|
return self._tokens[self._token_index - 1]
|
|
@@ -96,6 +96,12 @@ class Load(Operation):
|
|
|
96
96
|
headers = options.pop("headers", {})
|
|
97
97
|
body = options.pop("body", None)
|
|
98
98
|
|
|
99
|
+
# Set Accept-Encoding to support common compression formats
|
|
100
|
+
# Note: brotli (br) is excluded due to API incompatibility between
|
|
101
|
+
# aiohttp 3.13+ and the brotli package's Decompressor.decompress() method
|
|
102
|
+
if "Accept-Encoding" not in headers:
|
|
103
|
+
headers["Accept-Encoding"] = "gzip, deflate"
|
|
104
|
+
|
|
99
105
|
async with session.request(
|
|
100
106
|
method,
|
|
101
107
|
self.from_,
|
|
@@ -426,8 +426,6 @@ class Parser(BaseParser):
|
|
|
426
426
|
node = self._parse_node()
|
|
427
427
|
if node is None:
|
|
428
428
|
raise ValueError("Expected node definition")
|
|
429
|
-
if not isinstance(node, NodeReference):
|
|
430
|
-
raise ValueError("PatternExpression must start with a NodeReference")
|
|
431
429
|
pattern.add_element(node)
|
|
432
430
|
while True:
|
|
433
431
|
relationship = self._parse_relationship()
|
|
@@ -440,6 +438,7 @@ class Parser(BaseParser):
|
|
|
440
438
|
if node is None:
|
|
441
439
|
raise ValueError("Expected target node definition")
|
|
442
440
|
pattern.add_element(node)
|
|
441
|
+
pattern.verify()
|
|
443
442
|
return pattern
|
|
444
443
|
|
|
445
444
|
def _parse_node(self) -> Optional[Node]:
|
|
@@ -606,7 +605,7 @@ class Parser(BaseParser):
|
|
|
606
605
|
if func is not None:
|
|
607
606
|
lookup = self._parse_lookup(func)
|
|
608
607
|
expression.add_node(lookup)
|
|
609
|
-
elif self.token.is_left_parenthesis() and self.peek() is not None and self.peek().is_identifier():
|
|
608
|
+
elif self.token.is_left_parenthesis() and self.peek() is not None and (self.peek().is_identifier() or self.peek().is_colon() or self.peek().is_right_parenthesis()):
|
|
610
609
|
# Possible graph pattern expression
|
|
611
610
|
pattern = self._parse_pattern_expression()
|
|
612
611
|
if pattern is not None:
|
|
@@ -795,9 +794,9 @@ class Parser(BaseParser):
|
|
|
795
794
|
# Lookahead: identifier ( identifier in
|
|
796
795
|
if not self.ahead([
|
|
797
796
|
Token.IDENTIFIER(""),
|
|
798
|
-
Token.LEFT_PARENTHESIS,
|
|
797
|
+
Token.LEFT_PARENTHESIS(),
|
|
799
798
|
Token.IDENTIFIER(""),
|
|
800
|
-
Token.IN,
|
|
799
|
+
Token.IN(),
|
|
801
800
|
]):
|
|
802
801
|
return None
|
|
803
802
|
if self.token.value is None:
|