file_query_text 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- file_query_text-0.1.0/PKG-INFO +110 -0
- file_query_text-0.1.0/README.md +94 -0
- file_query_text-0.1.0/file_query_text/__init__.py +3 -0
- file_query_text-0.1.0/file_query_text/cli.py +66 -0
- file_query_text-0.1.0/file_query_text/grammar.py +63 -0
- file_query_text-0.1.0/file_query_text/main.py +110 -0
- file_query_text-0.1.0/file_query_text.egg-info/PKG-INFO +110 -0
- file_query_text-0.1.0/file_query_text.egg-info/SOURCES.txt +13 -0
- file_query_text-0.1.0/file_query_text.egg-info/dependency_links.txt +1 -0
- file_query_text-0.1.0/file_query_text.egg-info/entry_points.txt +2 -0
- file_query_text-0.1.0/file_query_text.egg-info/requires.txt +4 -0
- file_query_text-0.1.0/file_query_text.egg-info/top_level.txt +1 -0
- file_query_text-0.1.0/pyproject.toml +42 -0
- file_query_text-0.1.0/setup.cfg +4 -0
- file_query_text-0.1.0/tests/test_main.py +376 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: file_query_text
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: SQL-like interface for querying files in your filesystem
|
5
|
+
Author-email: nik <42a11b@nikdav.is>
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/nikdavis/file_query_text
|
8
|
+
Project-URL: Bug Tracker, https://github.com/nikdavis/file_query_text/issues
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Requires-Python: >=3.12
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
Requires-Dist: pyparsing>=3.2.3
|
14
|
+
Provides-Extra: dev
|
15
|
+
Requires-Dist: pytest>=8.3.5; extra == "dev"
|
16
|
+
|
17
|
+
# File Query
|
18
|
+
|
19
|
+
A SQL-like interface for querying files in your filesystem.
|
20
|
+
|
21
|
+
## Installation
|
22
|
+
|
23
|
+
```bash
|
24
|
+
# Clone the repository
|
25
|
+
git clone https://github.com/yourusername/file-query.git
|
26
|
+
cd file-query
|
27
|
+
|
28
|
+
# Install with pip
|
29
|
+
pip install -e .
|
30
|
+
|
31
|
+
# Or use UV
|
32
|
+
uv run python -m src.cli "your query"
|
33
|
+
|
34
|
+
# Install as a permanent tool with UV
|
35
|
+
uv tool install .
|
36
|
+
# This will install the 'fq' command
|
37
|
+
```
|
38
|
+
|
39
|
+
## Usage
|
40
|
+
|
41
|
+
### Command Line
|
42
|
+
|
43
|
+
The quickest way to run file-query is with UV:
|
44
|
+
|
45
|
+
```bash
|
46
|
+
uv run python -m src.cli "your query here"
|
47
|
+
```
|
48
|
+
|
49
|
+
After installation, you can use the shorthand command:
|
50
|
+
|
51
|
+
```bash
|
52
|
+
fq "your query here"
|
53
|
+
```
|
54
|
+
|
55
|
+
#### Basic Usage
|
56
|
+
|
57
|
+
```bash
|
58
|
+
# Find all Python files
|
59
|
+
fq "extension == 'py'"
|
60
|
+
|
61
|
+
# Find all text files and show their content
|
62
|
+
fq "extension == 'txt'" --show-content
|
63
|
+
```
|
64
|
+
|
65
|
+
#### Advanced Queries
|
66
|
+
|
67
|
+
File Query supports full SQL-like syntax:
|
68
|
+
|
69
|
+
```bash
|
70
|
+
# Find all Python files in the src directory
|
71
|
+
fq "SELECT * FROM 'src' WHERE extension == 'py'"
|
72
|
+
|
73
|
+
# Find all files larger than 100KB
|
74
|
+
fq "SELECT * FROM '.' WHERE size > 102400"
|
75
|
+
|
76
|
+
# Complex conditions
|
77
|
+
fq "SELECT * FROM '.' WHERE (extension == 'pdf' AND size > 1000000) OR (extension == 'txt' AND NOT name == 'README.txt')"
|
78
|
+
```
|
79
|
+
|
80
|
+
## Query Syntax
|
81
|
+
|
82
|
+
File Query uses a SQL-like syntax:
|
83
|
+
|
84
|
+
```sql
|
85
|
+
SELECT * FROM 'directory_path' WHERE condition
|
86
|
+
```
|
87
|
+
|
88
|
+
### Available Attributes
|
89
|
+
|
90
|
+
- `extension`: File extension (without the dot)
|
91
|
+
- `name`: Filename with extension
|
92
|
+
- `size`: File size in bytes
|
93
|
+
|
94
|
+
### Operators
|
95
|
+
|
96
|
+
- Comparison: `==`, `!=`, `<`, `<=`, `>`, `>=`
|
97
|
+
- Logical: `AND`, `OR`, `NOT`
|
98
|
+
|
99
|
+
## Examples
|
100
|
+
|
101
|
+
```bash
|
102
|
+
# Find all PDF files
|
103
|
+
fq "extension == 'pdf'"
|
104
|
+
|
105
|
+
# Find all files not named "main.py"
|
106
|
+
fq "NOT name == 'main.py'"
|
107
|
+
|
108
|
+
# Find all large image files
|
109
|
+
fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size > 500000"
|
110
|
+
```
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# File Query
|
2
|
+
|
3
|
+
A SQL-like interface for querying files in your filesystem.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
# Clone the repository
|
9
|
+
git clone https://github.com/yourusername/file-query.git
|
10
|
+
cd file-query
|
11
|
+
|
12
|
+
# Install with pip
|
13
|
+
pip install -e .
|
14
|
+
|
15
|
+
# Or use UV
|
16
|
+
uv run python -m src.cli "your query"
|
17
|
+
|
18
|
+
# Install as a permanent tool with UV
|
19
|
+
uv tool install .
|
20
|
+
# This will install the 'fq' command
|
21
|
+
```
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### Command Line
|
26
|
+
|
27
|
+
The quickest way to run file-query is with UV:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
uv run python -m src.cli "your query here"
|
31
|
+
```
|
32
|
+
|
33
|
+
After installation, you can use the shorthand command:
|
34
|
+
|
35
|
+
```bash
|
36
|
+
fq "your query here"
|
37
|
+
```
|
38
|
+
|
39
|
+
#### Basic Usage
|
40
|
+
|
41
|
+
```bash
|
42
|
+
# Find all Python files
|
43
|
+
fq "extension == 'py'"
|
44
|
+
|
45
|
+
# Find all text files and show their content
|
46
|
+
fq "extension == 'txt'" --show-content
|
47
|
+
```
|
48
|
+
|
49
|
+
#### Advanced Queries
|
50
|
+
|
51
|
+
File Query supports full SQL-like syntax:
|
52
|
+
|
53
|
+
```bash
|
54
|
+
# Find all Python files in the src directory
|
55
|
+
fq "SELECT * FROM 'src' WHERE extension == 'py'"
|
56
|
+
|
57
|
+
# Find all files larger than 100KB
|
58
|
+
fq "SELECT * FROM '.' WHERE size > 102400"
|
59
|
+
|
60
|
+
# Complex conditions
|
61
|
+
fq "SELECT * FROM '.' WHERE (extension == 'pdf' AND size > 1000000) OR (extension == 'txt' AND NOT name == 'README.txt')"
|
62
|
+
```
|
63
|
+
|
64
|
+
## Query Syntax
|
65
|
+
|
66
|
+
File Query uses a SQL-like syntax:
|
67
|
+
|
68
|
+
```sql
|
69
|
+
SELECT * FROM 'directory_path' WHERE condition
|
70
|
+
```
|
71
|
+
|
72
|
+
### Available Attributes
|
73
|
+
|
74
|
+
- `extension`: File extension (without the dot)
|
75
|
+
- `name`: Filename with extension
|
76
|
+
- `size`: File size in bytes
|
77
|
+
|
78
|
+
### Operators
|
79
|
+
|
80
|
+
- Comparison: `==`, `!=`, `<`, `<=`, `>`, `>=`
|
81
|
+
- Logical: `AND`, `OR`, `NOT`
|
82
|
+
|
83
|
+
## Examples
|
84
|
+
|
85
|
+
```bash
|
86
|
+
# Find all PDF files
|
87
|
+
fq "extension == 'pdf'"
|
88
|
+
|
89
|
+
# Find all files not named "main.py"
|
90
|
+
fq "NOT name == 'main.py'"
|
91
|
+
|
92
|
+
# Find all large image files
|
93
|
+
fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size > 500000"
|
94
|
+
```
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
import os
|
3
|
+
import sys
|
4
|
+
import argparse
|
5
|
+
from pathlib import Path
|
6
|
+
# Fix imports to work when installed as a package
|
7
|
+
from file_query.main import parse_query, QueryVisitor, execute_query
|
8
|
+
|
9
|
+
def main():
|
10
|
+
parser = argparse.ArgumentParser(description="SQL-like queries for your filesystem")
|
11
|
+
parser.add_argument("query", nargs="?", default="",
|
12
|
+
help="SQL query for finding files (default: lists all files in current directory)")
|
13
|
+
parser.add_argument(
|
14
|
+
"--show-content", "-c",
|
15
|
+
action="store_true",
|
16
|
+
help="Display content of the matching files"
|
17
|
+
)
|
18
|
+
args = parser.parse_args()
|
19
|
+
|
20
|
+
# Get current working directory for the query
|
21
|
+
cwd = os.getcwd()
|
22
|
+
|
23
|
+
# Handle different query formats:
|
24
|
+
# 1. Full SQL format: "SELECT * FROM 'path' WHERE condition"
|
25
|
+
# 2. Simple condition: "extension == 'py'"
|
26
|
+
# 3. Simple path only: "SELECT * FROM 'path'"
|
27
|
+
# 4. Empty query: return all files in current directory
|
28
|
+
if not args.query.strip():
|
29
|
+
# Empty query - list all files in current directory
|
30
|
+
query_str = f"SELECT * FROM '{cwd}'"
|
31
|
+
elif args.query.strip().upper().startswith("SELECT"):
|
32
|
+
# Full SQL format or SELECT * FROM without WHERE - use as is
|
33
|
+
query_str = args.query
|
34
|
+
else:
|
35
|
+
# Simple condition format - assume it's a WHERE condition
|
36
|
+
query_str = f"SELECT * FROM '{cwd}' WHERE {args.query}"
|
37
|
+
|
38
|
+
# Parse and execute the query
|
39
|
+
parsed = parse_query(query_str)
|
40
|
+
if parsed:
|
41
|
+
visitor = QueryVisitor()
|
42
|
+
visitor.visit(parsed)
|
43
|
+
results = execute_query(visitor.select, visitor.from_dirs, visitor.where)
|
44
|
+
|
45
|
+
# Display results
|
46
|
+
if not results:
|
47
|
+
print("No matching files found.")
|
48
|
+
return
|
49
|
+
|
50
|
+
print(f"Found {len(results)} matching files:")
|
51
|
+
for file_path in results:
|
52
|
+
print(file_path)
|
53
|
+
|
54
|
+
# Optionally display file contents
|
55
|
+
if args.show_content:
|
56
|
+
try:
|
57
|
+
with open(file_path, 'r') as f:
|
58
|
+
content = f.read()
|
59
|
+
print("\n--- File Content ---")
|
60
|
+
print(content)
|
61
|
+
print("--- End Content ---\n")
|
62
|
+
except Exception as e:
|
63
|
+
print(f"Error reading file: {e}")
|
64
|
+
|
65
|
+
if __name__ == "__main__":
|
66
|
+
main()
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from pyparsing import (
|
2
|
+
Word,
|
3
|
+
alphas,
|
4
|
+
alphanums,
|
5
|
+
QuotedString,
|
6
|
+
delimitedList,
|
7
|
+
Optional,
|
8
|
+
Group,
|
9
|
+
Suppress,
|
10
|
+
ZeroOrMore,
|
11
|
+
oneOf,
|
12
|
+
Forward,
|
13
|
+
Literal,
|
14
|
+
OneOrMore,
|
15
|
+
infixNotation,
|
16
|
+
opAssoc,
|
17
|
+
c_style_comment,
|
18
|
+
nums,
|
19
|
+
pyparsing_common,
|
20
|
+
)
|
21
|
+
|
22
|
+
# Define keywords
|
23
|
+
SELECT = Suppress(Word("SELECT"))
|
24
|
+
FROM = Suppress(Word("FROM"))
|
25
|
+
WHERE = Suppress(Word("WHERE"))
|
26
|
+
AND = Literal("AND")
|
27
|
+
OR = Literal("OR")
|
28
|
+
NOT = Literal("NOT")
|
29
|
+
|
30
|
+
# Define identifiers and literals
|
31
|
+
IDENTIFIER = Word(alphas + "_")
|
32
|
+
STRING_LITERAL = QuotedString("'", unquoteResults=True)
|
33
|
+
# Use pyparsing_common for numeric literals
|
34
|
+
NUMERIC_LITERAL = pyparsing_common.integer
|
35
|
+
DIRECTORY_LIST = Group(delimitedList(STRING_LITERAL))
|
36
|
+
|
37
|
+
# Define comparison operators
|
38
|
+
COMPARISON_OP = oneOf("== != < <= > >=")
|
39
|
+
ATTRIBUTE = IDENTIFIER + Suppress("=") + STRING_LITERAL
|
40
|
+
|
41
|
+
# Define basic condition with support for both string and numeric literals
|
42
|
+
VALUE = STRING_LITERAL | NUMERIC_LITERAL
|
43
|
+
basic_condition = Group(IDENTIFIER + COMPARISON_OP + VALUE)
|
44
|
+
|
45
|
+
# Define logical expressions using infixNotation for better handling of AND and OR
|
46
|
+
condition_expr = Forward()
|
47
|
+
condition_expr <<= infixNotation(
|
48
|
+
basic_condition,
|
49
|
+
[
|
50
|
+
(NOT, 1, opAssoc.RIGHT),
|
51
|
+
(AND, 2, opAssoc.LEFT),
|
52
|
+
(OR, 2, opAssoc.LEFT),
|
53
|
+
],
|
54
|
+
)
|
55
|
+
|
56
|
+
# Define the full query structure
|
57
|
+
query = (
|
58
|
+
SELECT
|
59
|
+
+ (Literal("*") | Group(OneOrMore(IDENTIFIER))).setResultsName("select")
|
60
|
+
+ FROM
|
61
|
+
+ DIRECTORY_LIST.setResultsName("from_dirs")
|
62
|
+
+ Optional(WHERE + condition_expr.setResultsName("where"))
|
63
|
+
)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
from file_query_text.grammar import query # Import the fixed grammar
|
4
|
+
|
5
|
+
|
6
|
+
def parse_query(query_str):
|
7
|
+
try:
|
8
|
+
# Increase recursion limit temporarily to handle complex queries
|
9
|
+
old_limit = sys.getrecursionlimit()
|
10
|
+
sys.setrecursionlimit(2000)
|
11
|
+
|
12
|
+
parsed = query.parseString(query_str, parseAll=True)
|
13
|
+
|
14
|
+
# Restore original recursion limit
|
15
|
+
sys.setrecursionlimit(old_limit)
|
16
|
+
return parsed
|
17
|
+
except Exception as e:
|
18
|
+
print(f"Parse error: {e}")
|
19
|
+
return None
|
20
|
+
|
21
|
+
class QueryVisitor:
|
22
|
+
def __init__(self):
|
23
|
+
self.select = []
|
24
|
+
self.from_dirs = []
|
25
|
+
self.where = None
|
26
|
+
|
27
|
+
def visit(self, parsed_query):
|
28
|
+
self.select = parsed_query.get("select", ["*"])
|
29
|
+
self.from_dirs = parsed_query.get("from_dirs", [])
|
30
|
+
self.where = parsed_query.get("where", None)
|
31
|
+
|
32
|
+
def execute_query(select, from_dirs, where_conditions):
|
33
|
+
matched_files = []
|
34
|
+
for directory in from_dirs:
|
35
|
+
if not os.path.exists(directory):
|
36
|
+
continue
|
37
|
+
for root, _, files in os.walk(directory):
|
38
|
+
for filename in files:
|
39
|
+
file_path = os.path.join(root, filename)
|
40
|
+
if evaluate_conditions(file_path, where_conditions):
|
41
|
+
matched_files.append(file_path)
|
42
|
+
return matched_files
|
43
|
+
|
44
|
+
def evaluate_conditions(file_path, condition):
|
45
|
+
if not condition:
|
46
|
+
return True
|
47
|
+
|
48
|
+
def get_file_attr(attr_name):
|
49
|
+
if attr_name == "extension":
|
50
|
+
return os.path.splitext(file_path)[1][1:]
|
51
|
+
if attr_name == "name":
|
52
|
+
return os.path.basename(file_path)
|
53
|
+
if attr_name == "size":
|
54
|
+
return os.path.getsize(file_path)
|
55
|
+
# Add more attributes as needed
|
56
|
+
return None
|
57
|
+
|
58
|
+
# Evaluation function for expressions
|
59
|
+
def eval_expr(expr):
|
60
|
+
if not isinstance(expr, list):
|
61
|
+
return expr # For simple terms like 'AND', 'OR'
|
62
|
+
|
63
|
+
if len(expr) == 3:
|
64
|
+
# Handle three types of expressions:
|
65
|
+
|
66
|
+
# 1. Basic condition: [attr, op, value]
|
67
|
+
if isinstance(expr[0], str) and isinstance(expr[1], str):
|
68
|
+
attr_val = get_file_attr(expr[0])
|
69
|
+
op = expr[1]
|
70
|
+
val = expr[2].strip("'") if isinstance(expr[2], str) else expr[2] # Remove quotes if string
|
71
|
+
|
72
|
+
if op == "==": return str(attr_val) == val
|
73
|
+
if op == "!=": return str(attr_val) != val
|
74
|
+
if op == "<": return attr_val is not None and int(attr_val) < int(val)
|
75
|
+
if op == "<=": return attr_val is not None and int(attr_val) <= int(val)
|
76
|
+
if op == ">": return attr_val is not None and int(attr_val) > int(val)
|
77
|
+
if op == ">=": return attr_val is not None and int(attr_val) >= int(val)
|
78
|
+
|
79
|
+
# 2. Logical operations from infixNotation: [left, op, right]
|
80
|
+
elif expr[1] == "AND":
|
81
|
+
return eval_expr(expr[0]) and eval_expr(expr[2])
|
82
|
+
elif expr[1] == "OR":
|
83
|
+
return eval_expr(expr[0]) or eval_expr(expr[2])
|
84
|
+
|
85
|
+
# 3. NOT operation: ['NOT', expr]
|
86
|
+
elif len(expr) == 2 and expr[0] == "NOT":
|
87
|
+
return not eval_expr(expr[1])
|
88
|
+
|
89
|
+
return False
|
90
|
+
|
91
|
+
return eval_expr(condition.asList())
|
92
|
+
|
93
|
+
# Example usage
|
94
|
+
if __name__ == "__main__":
|
95
|
+
# Get project root directory for demonstration
|
96
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
97
|
+
project_root = os.path.dirname(current_dir)
|
98
|
+
src_dir = os.path.join(project_root, "src")
|
99
|
+
tests_dir = os.path.join(project_root, "tests")
|
100
|
+
query_str = f"SELECT * FROM '{src_dir}', '{tests_dir}' WHERE extension == 'py'"
|
101
|
+
parsed = parse_query(query_str)
|
102
|
+
if parsed:
|
103
|
+
visitor = QueryVisitor()
|
104
|
+
visitor.visit(parsed)
|
105
|
+
results = execute_query(visitor.select, visitor.from_dirs, visitor.where)
|
106
|
+
print("Matching files:")
|
107
|
+
for file in results:
|
108
|
+
# Skip files in .venv directory
|
109
|
+
if ".venv" not in file:
|
110
|
+
print(file)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: file_query_text
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: SQL-like interface for querying files in your filesystem
|
5
|
+
Author-email: nik <42a11b@nikdav.is>
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/nikdavis/file_query_text
|
8
|
+
Project-URL: Bug Tracker, https://github.com/nikdavis/file_query_text/issues
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Requires-Python: >=3.12
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
Requires-Dist: pyparsing>=3.2.3
|
14
|
+
Provides-Extra: dev
|
15
|
+
Requires-Dist: pytest>=8.3.5; extra == "dev"
|
16
|
+
|
17
|
+
# File Query
|
18
|
+
|
19
|
+
A SQL-like interface for querying files in your filesystem.
|
20
|
+
|
21
|
+
## Installation
|
22
|
+
|
23
|
+
```bash
|
24
|
+
# Clone the repository
|
25
|
+
git clone https://github.com/yourusername/file-query.git
|
26
|
+
cd file-query
|
27
|
+
|
28
|
+
# Install with pip
|
29
|
+
pip install -e .
|
30
|
+
|
31
|
+
# Or use UV
|
32
|
+
uv run python -m src.cli "your query"
|
33
|
+
|
34
|
+
# Install as a permanent tool with UV
|
35
|
+
uv tool install .
|
36
|
+
# This will install the 'fq' command
|
37
|
+
```
|
38
|
+
|
39
|
+
## Usage
|
40
|
+
|
41
|
+
### Command Line
|
42
|
+
|
43
|
+
The quickest way to run file-query is with UV:
|
44
|
+
|
45
|
+
```bash
|
46
|
+
uv run python -m src.cli "your query here"
|
47
|
+
```
|
48
|
+
|
49
|
+
After installation, you can use the shorthand command:
|
50
|
+
|
51
|
+
```bash
|
52
|
+
fq "your query here"
|
53
|
+
```
|
54
|
+
|
55
|
+
#### Basic Usage
|
56
|
+
|
57
|
+
```bash
|
58
|
+
# Find all Python files
|
59
|
+
fq "extension == 'py'"
|
60
|
+
|
61
|
+
# Find all text files and show their content
|
62
|
+
fq "extension == 'txt'" --show-content
|
63
|
+
```
|
64
|
+
|
65
|
+
#### Advanced Queries
|
66
|
+
|
67
|
+
File Query supports full SQL-like syntax:
|
68
|
+
|
69
|
+
```bash
|
70
|
+
# Find all Python files in the src directory
|
71
|
+
fq "SELECT * FROM 'src' WHERE extension == 'py'"
|
72
|
+
|
73
|
+
# Find all files larger than 100KB
|
74
|
+
fq "SELECT * FROM '.' WHERE size > 102400"
|
75
|
+
|
76
|
+
# Complex conditions
|
77
|
+
fq "SELECT * FROM '.' WHERE (extension == 'pdf' AND size > 1000000) OR (extension == 'txt' AND NOT name == 'README.txt')"
|
78
|
+
```
|
79
|
+
|
80
|
+
## Query Syntax
|
81
|
+
|
82
|
+
File Query uses a SQL-like syntax:
|
83
|
+
|
84
|
+
```sql
|
85
|
+
SELECT * FROM 'directory_path' WHERE condition
|
86
|
+
```
|
87
|
+
|
88
|
+
### Available Attributes
|
89
|
+
|
90
|
+
- `extension`: File extension (without the dot)
|
91
|
+
- `name`: Filename with extension
|
92
|
+
- `size`: File size in bytes
|
93
|
+
|
94
|
+
### Operators
|
95
|
+
|
96
|
+
- Comparison: `==`, `!=`, `<`, `<=`, `>`, `>=`
|
97
|
+
- Logical: `AND`, `OR`, `NOT`
|
98
|
+
|
99
|
+
## Examples
|
100
|
+
|
101
|
+
```bash
|
102
|
+
# Find all PDF files
|
103
|
+
fq "extension == 'pdf'"
|
104
|
+
|
105
|
+
# Find all files not named "main.py"
|
106
|
+
fq "NOT name == 'main.py'"
|
107
|
+
|
108
|
+
# Find all large image files
|
109
|
+
fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size > 500000"
|
110
|
+
```
|
@@ -0,0 +1,13 @@
|
|
1
|
+
README.md
|
2
|
+
pyproject.toml
|
3
|
+
file_query_text/__init__.py
|
4
|
+
file_query_text/cli.py
|
5
|
+
file_query_text/grammar.py
|
6
|
+
file_query_text/main.py
|
7
|
+
file_query_text.egg-info/PKG-INFO
|
8
|
+
file_query_text.egg-info/SOURCES.txt
|
9
|
+
file_query_text.egg-info/dependency_links.txt
|
10
|
+
file_query_text.egg-info/entry_points.txt
|
11
|
+
file_query_text.egg-info/requires.txt
|
12
|
+
file_query_text.egg-info/top_level.txt
|
13
|
+
tests/test_main.py
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
file_query_text
|
@@ -0,0 +1,42 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=61.0"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "file_query_text"
|
7
|
+
version = "0.1.0"
|
8
|
+
description = "SQL-like interface for querying files in your filesystem"
|
9
|
+
readme = "README.md"
|
10
|
+
requires-python = ">=3.12"
|
11
|
+
license = "MIT"
|
12
|
+
authors = [
|
13
|
+
{name = "nik", email = "42a11b@nikdav.is"}
|
14
|
+
]
|
15
|
+
classifiers = [
|
16
|
+
"Programming Language :: Python :: 3",
|
17
|
+
"Operating System :: OS Independent",
|
18
|
+
]
|
19
|
+
dependencies = [
|
20
|
+
"pyparsing>=3.2.3",
|
21
|
+
]
|
22
|
+
|
23
|
+
[project.urls]
|
24
|
+
"Homepage" = "https://github.com/nikdavis/file_query_text"
|
25
|
+
"Bug Tracker" = "https://github.com/nikdavis/file_query_text/issues"
|
26
|
+
|
27
|
+
[project.scripts]
|
28
|
+
fq = "file_query_text.cli:main"
|
29
|
+
|
30
|
+
[tool.setuptools]
|
31
|
+
packages = ["file_query_text"]
|
32
|
+
|
33
|
+
[project.optional-dependencies]
|
34
|
+
dev = [
|
35
|
+
"pytest>=8.3.5",
|
36
|
+
]
|
37
|
+
|
38
|
+
[[tool.uv.index]]
|
39
|
+
name = "testpypi"
|
40
|
+
url = "https://test.pypi.org/simple/"
|
41
|
+
publish-url = "https://test.pypi.org/legacy/"
|
42
|
+
explicit = true
|
@@ -0,0 +1,376 @@
|
|
1
|
+
import os
|
2
|
+
import pytest
|
3
|
+
import tempfile
|
4
|
+
from pathlib import Path
|
5
|
+
from file_query_text.main import parse_query, execute_query, QueryVisitor
|
6
|
+
|
7
|
+
@pytest.fixture
|
8
|
+
def temp_dir():
|
9
|
+
"""Create a temporary directory with test files for each test case."""
|
10
|
+
temp_dir = tempfile.TemporaryDirectory()
|
11
|
+
root_path = Path(temp_dir.name)
|
12
|
+
|
13
|
+
# Create test directories
|
14
|
+
(root_path / "docs").mkdir()
|
15
|
+
(root_path / "downloads").mkdir()
|
16
|
+
|
17
|
+
# Create test files
|
18
|
+
with open(root_path / "docs/report.pdf", "w") as f:
|
19
|
+
f.write("Test PDF")
|
20
|
+
with open(root_path / "docs/note.txt", "w") as f:
|
21
|
+
f.write("Test TXT")
|
22
|
+
with open(root_path / "downloads/image.jpg", "w") as f:
|
23
|
+
f.write("Test JPG")
|
24
|
+
|
25
|
+
yield root_path # Provide the path to the test
|
26
|
+
|
27
|
+
# Cleanup is handled automatically by TemporaryDirectory
|
28
|
+
|
29
|
+
def test_basic_query(temp_dir):
|
30
|
+
"""Test SELECT * FROM with a WHERE clause on extension."""
|
31
|
+
query_str = f"""
|
32
|
+
SELECT *
|
33
|
+
FROM '{temp_dir}/docs', '{temp_dir}/downloads'
|
34
|
+
WHERE extension == 'pdf'
|
35
|
+
"""
|
36
|
+
|
37
|
+
parsed = parse_query(query_str)
|
38
|
+
visitor = QueryVisitor()
|
39
|
+
visitor.visit(parsed)
|
40
|
+
|
41
|
+
results = execute_query(
|
42
|
+
visitor.select,
|
43
|
+
visitor.from_dirs,
|
44
|
+
visitor.where
|
45
|
+
)
|
46
|
+
|
47
|
+
# Expected result (only the PDF file)
|
48
|
+
expected = [str(temp_dir / "docs/report.pdf")]
|
49
|
+
|
50
|
+
# Normalize paths for comparison (handle different OS path separators)
|
51
|
+
actual = [str(p) for p in results]
|
52
|
+
assert sorted(actual) == sorted(expected)
|
53
|
+
|
54
|
+
def test_multiple_conditions(temp_dir):
|
55
|
+
"""Test OR conditions."""
|
56
|
+
query_str = f"""
|
57
|
+
SELECT *
|
58
|
+
FROM '{temp_dir}'
|
59
|
+
WHERE extension == 'pdf'
|
60
|
+
"""
|
61
|
+
|
62
|
+
parsed = parse_query(query_str)
|
63
|
+
visitor = QueryVisitor()
|
64
|
+
visitor.visit(parsed)
|
65
|
+
|
66
|
+
results = execute_query(
|
67
|
+
visitor.select,
|
68
|
+
visitor.from_dirs,
|
69
|
+
visitor.where
|
70
|
+
)
|
71
|
+
|
72
|
+
# Check if we got at least one result
|
73
|
+
assert len(results) > 0
|
74
|
+
|
75
|
+
def test_nonexistent_directory():
|
76
|
+
"""Test query with a non-existent directory."""
|
77
|
+
query_str = """
|
78
|
+
SELECT * FROM '/nonexistent/path'
|
79
|
+
WHERE extension == 'pdf'
|
80
|
+
"""
|
81
|
+
parsed = parse_query(query_str)
|
82
|
+
visitor = QueryVisitor()
|
83
|
+
visitor.visit(parsed)
|
84
|
+
|
85
|
+
results = execute_query(visitor.select, visitor.from_dirs, visitor.where)
|
86
|
+
assert len(results) == 0
|
87
|
+
|
88
|
+
# Optional: Test AND / NOT conditions
|
89
|
+
def test_combined_conditions(temp_dir):
|
90
|
+
"""Test AND and NOT conditions."""
|
91
|
+
query_str = f"""
|
92
|
+
SELECT *
|
93
|
+
FROM '{temp_dir}/downloads'
|
94
|
+
WHERE extension == 'png'
|
95
|
+
"""
|
96
|
+
|
97
|
+
parsed = parse_query(query_str)
|
98
|
+
visitor = QueryVisitor()
|
99
|
+
visitor.visit(parsed)
|
100
|
+
|
101
|
+
results = execute_query(
|
102
|
+
visitor.select,
|
103
|
+
visitor.from_dirs,
|
104
|
+
visitor.where
|
105
|
+
)
|
106
|
+
|
107
|
+
# We don't have any png files
|
108
|
+
assert len(results) == 0
|
109
|
+
|
110
|
+
def test_and_conditions(temp_dir):
|
111
|
+
"""Test AND condition logic."""
|
112
|
+
# Create a specific file for this test
|
113
|
+
with open(temp_dir / "docs/report_2023.pdf", "w") as f:
|
114
|
+
f.write("Test PDF with year")
|
115
|
+
|
116
|
+
query_str = f"""
|
117
|
+
SELECT *
|
118
|
+
FROM '{temp_dir}/docs'
|
119
|
+
WHERE extension == 'pdf' AND name == 'report_2023.pdf'
|
120
|
+
"""
|
121
|
+
|
122
|
+
parsed = parse_query(query_str)
|
123
|
+
visitor = QueryVisitor()
|
124
|
+
visitor.visit(parsed)
|
125
|
+
|
126
|
+
results = execute_query(
|
127
|
+
visitor.select,
|
128
|
+
visitor.from_dirs,
|
129
|
+
visitor.where
|
130
|
+
)
|
131
|
+
|
132
|
+
# Expected result (only the matching PDF file)
|
133
|
+
expected = [str(temp_dir / "docs/report_2023.pdf")]
|
134
|
+
|
135
|
+
# Normalize paths for comparison
|
136
|
+
actual = [str(p) for p in results]
|
137
|
+
assert sorted(actual) == sorted(expected)
|
138
|
+
|
139
|
+
def test_or_conditions(temp_dir):
|
140
|
+
"""Test OR condition logic."""
|
141
|
+
# Create specific files for this test
|
142
|
+
with open(temp_dir / "docs/report_2023.pdf", "w") as f:
|
143
|
+
f.write("Test PDF with year")
|
144
|
+
with open(temp_dir / "docs/presentation.ppt", "w") as f:
|
145
|
+
f.write("Test PPT")
|
146
|
+
|
147
|
+
query_str = f"""
|
148
|
+
SELECT *
|
149
|
+
FROM '{temp_dir}/docs'
|
150
|
+
WHERE extension == 'pdf' OR extension == 'ppt'
|
151
|
+
"""
|
152
|
+
|
153
|
+
parsed = parse_query(query_str)
|
154
|
+
visitor = QueryVisitor()
|
155
|
+
visitor.visit(parsed)
|
156
|
+
|
157
|
+
results = execute_query(
|
158
|
+
visitor.select,
|
159
|
+
visitor.from_dirs,
|
160
|
+
visitor.where
|
161
|
+
)
|
162
|
+
|
163
|
+
# Get all files in the directory with the specified extensions
|
164
|
+
all_pdf_files = list((temp_dir / "docs").glob("*.pdf"))
|
165
|
+
all_ppt_files = list((temp_dir / "docs").glob("*.ppt"))
|
166
|
+
expected_files = all_pdf_files + all_ppt_files
|
167
|
+
expected = [str(p) for p in expected_files]
|
168
|
+
|
169
|
+
# Normalize paths for comparison
|
170
|
+
actual = [str(p) for p in results]
|
171
|
+
assert sorted(actual) == sorted(expected)
|
172
|
+
|
173
|
+
def test_not_conditions(temp_dir):
|
174
|
+
"""Test NOT condition logic."""
|
175
|
+
# Create specific files for this test
|
176
|
+
with open(temp_dir / "docs/report.pdf", "w") as f:
|
177
|
+
f.write("Test PDF")
|
178
|
+
with open(temp_dir / "docs/presentation.ppt", "w") as f:
|
179
|
+
f.write("Test PPT")
|
180
|
+
with open(temp_dir / "docs/document.txt", "w") as f:
|
181
|
+
f.write("Test TXT")
|
182
|
+
|
183
|
+
query_str = f"""
|
184
|
+
SELECT *
|
185
|
+
FROM '{temp_dir}/docs'
|
186
|
+
WHERE NOT extension == 'pdf'
|
187
|
+
"""
|
188
|
+
|
189
|
+
parsed = parse_query(query_str)
|
190
|
+
visitor = QueryVisitor()
|
191
|
+
visitor.visit(parsed)
|
192
|
+
|
193
|
+
results = execute_query(
|
194
|
+
visitor.select,
|
195
|
+
visitor.from_dirs,
|
196
|
+
visitor.where
|
197
|
+
)
|
198
|
+
|
199
|
+
# Query should return all non-PDF files
|
200
|
+
all_non_pdf_files = []
|
201
|
+
for path in (temp_dir / "docs").glob("*"):
|
202
|
+
if path.suffix != ".pdf":
|
203
|
+
all_non_pdf_files.append(str(path))
|
204
|
+
|
205
|
+
# Normalize paths for comparison
|
206
|
+
actual = [str(p) for p in results]
|
207
|
+
assert sorted(actual) == sorted(all_non_pdf_files)
|
208
|
+
|
209
|
+
def test_numeric_comparison(temp_dir):
|
210
|
+
"""Test numerical comparison operators."""
|
211
|
+
# Create files with different sizes
|
212
|
+
with open(temp_dir / "docs/small.txt", "w") as f:
|
213
|
+
f.write("Small") # Size is 5 bytes
|
214
|
+
with open(temp_dir / "docs/medium.txt", "w") as f:
|
215
|
+
f.write("Medium text" * 5) # Size > 10 bytes
|
216
|
+
with open(temp_dir / "docs/large.txt", "w") as f:
|
217
|
+
f.write("Large text file" * 20) # Size > 100 bytes
|
218
|
+
|
219
|
+
# Query: Find files larger than 100 bytes
|
220
|
+
query_str = f"SELECT * FROM '{temp_dir}/docs' WHERE size > 100"
|
221
|
+
|
222
|
+
parsed = parse_query(query_str)
|
223
|
+
visitor = QueryVisitor()
|
224
|
+
visitor.visit(parsed)
|
225
|
+
|
226
|
+
results = execute_query(
|
227
|
+
visitor.select,
|
228
|
+
visitor.from_dirs,
|
229
|
+
visitor.where
|
230
|
+
)
|
231
|
+
|
232
|
+
# Filter files manually to compare
|
233
|
+
large_files = []
|
234
|
+
for path in (temp_dir / "docs").glob("*"):
|
235
|
+
if path.stat().st_size > 100:
|
236
|
+
large_files.append(str(path))
|
237
|
+
|
238
|
+
# Normalize paths for comparison
|
239
|
+
actual = [str(p) for p in results]
|
240
|
+
assert sorted(actual) == sorted(large_files)
|
241
|
+
|
242
|
+
def test_complex_nested_conditions(temp_dir):
|
243
|
+
"""Test complex nested logical conditions."""
|
244
|
+
# Create specific test files with various properties
|
245
|
+
with open(temp_dir / "docs/small_report.pdf", "w") as f:
|
246
|
+
f.write("Small PDF") # Small PDF file
|
247
|
+
with open(temp_dir / "docs/large_report.pdf", "w") as f:
|
248
|
+
f.write("Large PDF file" * 20) # Large PDF file
|
249
|
+
with open(temp_dir / "docs/small_note.txt", "w") as f:
|
250
|
+
f.write("Small TXT") # Small TXT file
|
251
|
+
with open(temp_dir / "docs/large_note.txt", "w") as f:
|
252
|
+
f.write("Large TXT file" * 20) # Large TXT file
|
253
|
+
with open(temp_dir / "docs/image.jpg", "w") as f:
|
254
|
+
f.write("Image file" * 5) # JPG file
|
255
|
+
|
256
|
+
# Complex query: Find (PDF files that are large) OR (TXT files that are not small)
|
257
|
+
query_str = f"""
|
258
|
+
SELECT *
|
259
|
+
FROM '{temp_dir}/docs'
|
260
|
+
WHERE (extension == 'pdf' AND size > 100) OR (extension == 'txt' AND NOT size < 50)
|
261
|
+
"""
|
262
|
+
|
263
|
+
parsed = parse_query(query_str)
|
264
|
+
visitor = QueryVisitor()
|
265
|
+
visitor.visit(parsed)
|
266
|
+
|
267
|
+
results = execute_query(
|
268
|
+
visitor.select,
|
269
|
+
visitor.from_dirs,
|
270
|
+
visitor.where
|
271
|
+
)
|
272
|
+
|
273
|
+
# Manually determine expected results
|
274
|
+
expected_files = []
|
275
|
+
for path in (temp_dir / "docs").glob("*"):
|
276
|
+
ext = path.suffix[1:] # Remove the dot
|
277
|
+
size = path.stat().st_size
|
278
|
+
if (ext == 'pdf' and size > 100) or (ext == 'txt' and size >= 50):
|
279
|
+
expected_files.append(str(path))
|
280
|
+
|
281
|
+
# Normalize paths for comparison
|
282
|
+
actual = [str(p) for p in results]
|
283
|
+
assert sorted(actual) == sorted(expected_files)
|
284
|
+
|
285
|
+
def test_query_without_where_clause(temp_dir):
|
286
|
+
"""Test SELECT * FROM without a WHERE clause."""
|
287
|
+
query_str = f"""
|
288
|
+
SELECT *
|
289
|
+
FROM '{temp_dir}/docs'
|
290
|
+
"""
|
291
|
+
|
292
|
+
parsed = parse_query(query_str)
|
293
|
+
visitor = QueryVisitor()
|
294
|
+
visitor.visit(parsed)
|
295
|
+
|
296
|
+
results = execute_query(
|
297
|
+
visitor.select,
|
298
|
+
visitor.from_dirs,
|
299
|
+
visitor.where
|
300
|
+
)
|
301
|
+
|
302
|
+
# All files in the docs directory should be returned
|
303
|
+
expected_files = []
|
304
|
+
for path in (temp_dir / "docs").glob("*"):
|
305
|
+
if path.is_file():
|
306
|
+
expected_files.append(str(path))
|
307
|
+
|
308
|
+
# Normalize paths for comparison
|
309
|
+
actual = [str(p) for p in results]
|
310
|
+
assert sorted(actual) == sorted(expected_files)
|
311
|
+
|
312
|
+
def test_empty_query(temp_dir):
|
313
|
+
"""Test empty query which should return all files."""
|
314
|
+
# Create a test file structure
|
315
|
+
with open(temp_dir / "docs/extra_file.txt", "w") as f:
|
316
|
+
f.write("Extra test file")
|
317
|
+
|
318
|
+
# First, construct the query string that the CLI would create for an empty query
|
319
|
+
query_str = f"SELECT * FROM '{temp_dir}'"
|
320
|
+
|
321
|
+
parsed = parse_query(query_str)
|
322
|
+
visitor = QueryVisitor()
|
323
|
+
visitor.visit(parsed)
|
324
|
+
|
325
|
+
results = execute_query(
|
326
|
+
visitor.select,
|
327
|
+
visitor.from_dirs,
|
328
|
+
visitor.where
|
329
|
+
)
|
330
|
+
|
331
|
+
# Count all files in all subdirectories
|
332
|
+
expected_files = []
|
333
|
+
for path in temp_dir.glob("**/*"):
|
334
|
+
if path.is_file():
|
335
|
+
expected_files.append(str(path))
|
336
|
+
|
337
|
+
# Normalize paths for comparison
|
338
|
+
actual = [str(p) for p in results]
|
339
|
+
assert sorted(actual) == sorted(expected_files)
|
340
|
+
|
341
|
+
# Ensure we're getting more than just one file type
|
342
|
+
extensions = {os.path.splitext(p)[1] for p in actual}
|
343
|
+
assert len(extensions) > 1, "Empty query should return files with different extensions"
|
344
|
+
|
345
|
+
def test_no_argument_query(temp_dir):
|
346
|
+
"""Test when no query argument is passed (None), should be treated as empty string."""
|
347
|
+
# Create a test file structure
|
348
|
+
with open(temp_dir / "docs/extra_file2.txt", "w") as f:
|
349
|
+
f.write("Another test file")
|
350
|
+
|
351
|
+
# Simulate what happens when no argument is passed (CLI would convert to empty string)
|
352
|
+
query_str = f"SELECT * FROM '{temp_dir}'"
|
353
|
+
|
354
|
+
parsed = parse_query(query_str)
|
355
|
+
visitor = QueryVisitor()
|
356
|
+
visitor.visit(parsed)
|
357
|
+
|
358
|
+
results = execute_query(
|
359
|
+
visitor.select,
|
360
|
+
visitor.from_dirs,
|
361
|
+
visitor.where
|
362
|
+
)
|
363
|
+
|
364
|
+
# Count all files in all subdirectories
|
365
|
+
expected_files = []
|
366
|
+
for path in temp_dir.glob("**/*"):
|
367
|
+
if path.is_file():
|
368
|
+
expected_files.append(str(path))
|
369
|
+
|
370
|
+
# Normalize paths for comparison
|
371
|
+
actual = [str(p) for p in results]
|
372
|
+
assert sorted(actual) == sorted(expected_files)
|
373
|
+
|
374
|
+
# Ensure we're getting more than just one file type
|
375
|
+
extensions = {os.path.splitext(p)[1] for p in actual}
|
376
|
+
assert len(extensions) > 1, "Query with no argument should return files with different extensions"
|