file_query_text 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: file_query_text
3
+ Version: 0.1.0
4
+ Summary: SQL-like interface for querying files in your filesystem
5
+ Author-email: nik <42a11b@nikdav.is>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/nikdavis/file_query_text
8
+ Project-URL: Bug Tracker, https://github.com/nikdavis/file_query_text/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.12
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: pyparsing>=3.2.3
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8.3.5; extra == "dev"
16
+
17
+ # File Query
18
+
19
+ A SQL-like interface for querying files in your filesystem.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ # Clone the repository
25
+ git clone https://github.com/yourusername/file-query.git
26
+ cd file-query
27
+
28
+ # Install with pip
29
+ pip install -e .
30
+
31
+ # Or use UV
32
+ uv run python -m src.cli "your query"
33
+
34
+ # Install as a permanent tool with UV
35
+ uv tool install .
36
+ # This will install the 'fq' command
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ### Command Line
42
+
43
+ The quickest way to run file-query is with UV:
44
+
45
+ ```bash
46
+ uv run python -m src.cli "your query here"
47
+ ```
48
+
49
+ After installation, you can use the shorthand command:
50
+
51
+ ```bash
52
+ fq "your query here"
53
+ ```
54
+
55
+ #### Basic Usage
56
+
57
+ ```bash
58
+ # Find all Python files
59
+ fq "extension == 'py'"
60
+
61
+ # Find all text files and show their content
62
+ fq "extension == 'txt'" --show-content
63
+ ```
64
+
65
+ #### Advanced Queries
66
+
67
+ File Query supports full SQL-like syntax:
68
+
69
+ ```bash
70
+ # Find all Python files in the src directory
71
+ fq "SELECT * FROM 'src' WHERE extension == 'py'"
72
+
73
+ # Find all files larger than 100KB
74
+ fq "SELECT * FROM '.' WHERE size > 102400"
75
+
76
+ # Complex conditions
77
+ fq "SELECT * FROM '.' WHERE (extension == 'pdf' AND size > 1000000) OR (extension == 'txt' AND NOT name == 'README.txt')"
78
+ ```
79
+
80
+ ## Query Syntax
81
+
82
+ File Query uses a SQL-like syntax:
83
+
84
+ ```sql
85
+ SELECT * FROM 'directory_path' WHERE condition
86
+ ```
87
+
88
+ ### Available Attributes
89
+
90
+ - `extension`: File extension (without the dot)
91
+ - `name`: Filename with extension
92
+ - `size`: File size in bytes
93
+
94
+ ### Operators
95
+
96
+ - Comparison: `==`, `!=`, `<`, `<=`, `>`, `>=`
97
+ - Logical: `AND`, `OR`, `NOT`
98
+
99
+ ## Examples
100
+
101
+ ```bash
102
+ # Find all PDF files
103
+ fq "extension == 'pdf'"
104
+
105
+ # Find all files not named "main.py"
106
+ fq "NOT name == 'main.py'"
107
+
108
+ # Find all large image files
109
+ fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size > 500000"
110
+ ```
@@ -0,0 +1,94 @@
1
+ # File Query
2
+
3
+ A SQL-like interface for querying files in your filesystem.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # Clone the repository
9
+ git clone https://github.com/yourusername/file-query.git
10
+ cd file-query
11
+
12
+ # Install with pip
13
+ pip install -e .
14
+
15
+ # Or use UV
16
+ uv run python -m src.cli "your query"
17
+
18
+ # Install as a permanent tool with UV
19
+ uv tool install .
20
+ # This will install the 'fq' command
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Command Line
26
+
27
+ The quickest way to run file-query is with UV:
28
+
29
+ ```bash
30
+ uv run python -m src.cli "your query here"
31
+ ```
32
+
33
+ After installation, you can use the shorthand command:
34
+
35
+ ```bash
36
+ fq "your query here"
37
+ ```
38
+
39
+ #### Basic Usage
40
+
41
+ ```bash
42
+ # Find all Python files
43
+ fq "extension == 'py'"
44
+
45
+ # Find all text files and show their content
46
+ fq "extension == 'txt'" --show-content
47
+ ```
48
+
49
+ #### Advanced Queries
50
+
51
+ File Query supports full SQL-like syntax:
52
+
53
+ ```bash
54
+ # Find all Python files in the src directory
55
+ fq "SELECT * FROM 'src' WHERE extension == 'py'"
56
+
57
+ # Find all files larger than 100KB
58
+ fq "SELECT * FROM '.' WHERE size > 102400"
59
+
60
+ # Complex conditions
61
+ fq "SELECT * FROM '.' WHERE (extension == 'pdf' AND size > 1000000) OR (extension == 'txt' AND NOT name == 'README.txt')"
62
+ ```
63
+
64
+ ## Query Syntax
65
+
66
+ File Query uses a SQL-like syntax:
67
+
68
+ ```sql
69
+ SELECT * FROM 'directory_path' WHERE condition
70
+ ```
71
+
72
+ ### Available Attributes
73
+
74
+ - `extension`: File extension (without the dot)
75
+ - `name`: Filename with extension
76
+ - `size`: File size in bytes
77
+
78
+ ### Operators
79
+
80
+ - Comparison: `==`, `!=`, `<`, `<=`, `>`, `>=`
81
+ - Logical: `AND`, `OR`, `NOT`
82
+
83
+ ## Examples
84
+
85
+ ```bash
86
+ # Find all PDF files
87
+ fq "extension == 'pdf'"
88
+
89
+ # Find all files not named "main.py"
90
+ fq "NOT name == 'main.py'"
91
+
92
+ # Find all large image files
93
+ fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size > 500000"
94
+ ```
@@ -0,0 +1,3 @@
1
+ """SQL-like interface for querying files in your filesystem."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import sys
4
+ import argparse
5
+ from pathlib import Path
6
+ # Fix imports to work when installed as a package
7
+ from file_query.main import parse_query, QueryVisitor, execute_query
8
+
9
+ def main():
10
+ parser = argparse.ArgumentParser(description="SQL-like queries for your filesystem")
11
+ parser.add_argument("query", nargs="?", default="",
12
+ help="SQL query for finding files (default: lists all files in current directory)")
13
+ parser.add_argument(
14
+ "--show-content", "-c",
15
+ action="store_true",
16
+ help="Display content of the matching files"
17
+ )
18
+ args = parser.parse_args()
19
+
20
+ # Get current working directory for the query
21
+ cwd = os.getcwd()
22
+
23
+ # Handle different query formats:
24
+ # 1. Full SQL format: "SELECT * FROM 'path' WHERE condition"
25
+ # 2. Simple condition: "extension == 'py'"
26
+ # 3. Simple path only: "SELECT * FROM 'path'"
27
+ # 4. Empty query: return all files in current directory
28
+ if not args.query.strip():
29
+ # Empty query - list all files in current directory
30
+ query_str = f"SELECT * FROM '{cwd}'"
31
+ elif args.query.strip().upper().startswith("SELECT"):
32
+ # Full SQL format or SELECT * FROM without WHERE - use as is
33
+ query_str = args.query
34
+ else:
35
+ # Simple condition format - assume it's a WHERE condition
36
+ query_str = f"SELECT * FROM '{cwd}' WHERE {args.query}"
37
+
38
+ # Parse and execute the query
39
+ parsed = parse_query(query_str)
40
+ if parsed:
41
+ visitor = QueryVisitor()
42
+ visitor.visit(parsed)
43
+ results = execute_query(visitor.select, visitor.from_dirs, visitor.where)
44
+
45
+ # Display results
46
+ if not results:
47
+ print("No matching files found.")
48
+ return
49
+
50
+ print(f"Found {len(results)} matching files:")
51
+ for file_path in results:
52
+ print(file_path)
53
+
54
+ # Optionally display file contents
55
+ if args.show_content:
56
+ try:
57
+ with open(file_path, 'r') as f:
58
+ content = f.read()
59
+ print("\n--- File Content ---")
60
+ print(content)
61
+ print("--- End Content ---\n")
62
+ except Exception as e:
63
+ print(f"Error reading file: {e}")
64
+
65
+ if __name__ == "__main__":
66
+ main()
@@ -0,0 +1,63 @@
1
+ from pyparsing import (
2
+ Word,
3
+ alphas,
4
+ alphanums,
5
+ QuotedString,
6
+ delimitedList,
7
+ Optional,
8
+ Group,
9
+ Suppress,
10
+ ZeroOrMore,
11
+ oneOf,
12
+ Forward,
13
+ Literal,
14
+ OneOrMore,
15
+ infixNotation,
16
+ opAssoc,
17
+ c_style_comment,
18
+ nums,
19
+ pyparsing_common,
20
+ )
21
+
22
+ # Define keywords
23
+ SELECT = Suppress(Word("SELECT"))
24
+ FROM = Suppress(Word("FROM"))
25
+ WHERE = Suppress(Word("WHERE"))
26
+ AND = Literal("AND")
27
+ OR = Literal("OR")
28
+ NOT = Literal("NOT")
29
+
30
+ # Define identifiers and literals
31
+ IDENTIFIER = Word(alphas + "_")
32
+ STRING_LITERAL = QuotedString("'", unquoteResults=True)
33
+ # Use pyparsing_common for numeric literals
34
+ NUMERIC_LITERAL = pyparsing_common.integer
35
+ DIRECTORY_LIST = Group(delimitedList(STRING_LITERAL))
36
+
37
+ # Define comparison operators
38
+ COMPARISON_OP = oneOf("== != < <= > >=")
39
+ ATTRIBUTE = IDENTIFIER + Suppress("=") + STRING_LITERAL
40
+
41
+ # Define basic condition with support for both string and numeric literals
42
+ VALUE = STRING_LITERAL | NUMERIC_LITERAL
43
+ basic_condition = Group(IDENTIFIER + COMPARISON_OP + VALUE)
44
+
45
+ # Define logical expressions using infixNotation for better handling of AND and OR
46
+ condition_expr = Forward()
47
+ condition_expr <<= infixNotation(
48
+ basic_condition,
49
+ [
50
+ (NOT, 1, opAssoc.RIGHT),
51
+ (AND, 2, opAssoc.LEFT),
52
+ (OR, 2, opAssoc.LEFT),
53
+ ],
54
+ )
55
+
56
+ # Define the full query structure
57
+ query = (
58
+ SELECT
59
+ + (Literal("*") | Group(OneOrMore(IDENTIFIER))).setResultsName("select")
60
+ + FROM
61
+ + DIRECTORY_LIST.setResultsName("from_dirs")
62
+ + Optional(WHERE + condition_expr.setResultsName("where"))
63
+ )
@@ -0,0 +1,110 @@
1
+ import os
2
+ import sys
3
+ from file_query_text.grammar import query # Import the fixed grammar
4
+
5
+
6
+ def parse_query(query_str):
7
+ try:
8
+ # Increase recursion limit temporarily to handle complex queries
9
+ old_limit = sys.getrecursionlimit()
10
+ sys.setrecursionlimit(2000)
11
+
12
+ parsed = query.parseString(query_str, parseAll=True)
13
+
14
+ # Restore original recursion limit
15
+ sys.setrecursionlimit(old_limit)
16
+ return parsed
17
+ except Exception as e:
18
+ print(f"Parse error: {e}")
19
+ return None
20
+
21
+ class QueryVisitor:
22
+ def __init__(self):
23
+ self.select = []
24
+ self.from_dirs = []
25
+ self.where = None
26
+
27
+ def visit(self, parsed_query):
28
+ self.select = parsed_query.get("select", ["*"])
29
+ self.from_dirs = parsed_query.get("from_dirs", [])
30
+ self.where = parsed_query.get("where", None)
31
+
32
+ def execute_query(select, from_dirs, where_conditions):
33
+ matched_files = []
34
+ for directory in from_dirs:
35
+ if not os.path.exists(directory):
36
+ continue
37
+ for root, _, files in os.walk(directory):
38
+ for filename in files:
39
+ file_path = os.path.join(root, filename)
40
+ if evaluate_conditions(file_path, where_conditions):
41
+ matched_files.append(file_path)
42
+ return matched_files
43
+
44
+ def evaluate_conditions(file_path, condition):
45
+ if not condition:
46
+ return True
47
+
48
+ def get_file_attr(attr_name):
49
+ if attr_name == "extension":
50
+ return os.path.splitext(file_path)[1][1:]
51
+ if attr_name == "name":
52
+ return os.path.basename(file_path)
53
+ if attr_name == "size":
54
+ return os.path.getsize(file_path)
55
+ # Add more attributes as needed
56
+ return None
57
+
58
+ # Evaluation function for expressions
59
+ def eval_expr(expr):
60
+ if not isinstance(expr, list):
61
+ return expr # For simple terms like 'AND', 'OR'
62
+
63
+ if len(expr) == 3:
64
+ # Handle three types of expressions:
65
+
66
+ # 1. Basic condition: [attr, op, value]
67
+ if isinstance(expr[0], str) and isinstance(expr[1], str):
68
+ attr_val = get_file_attr(expr[0])
69
+ op = expr[1]
70
+ val = expr[2].strip("'") if isinstance(expr[2], str) else expr[2] # Remove quotes if string
71
+
72
+ if op == "==": return str(attr_val) == val
73
+ if op == "!=": return str(attr_val) != val
74
+ if op == "<": return attr_val is not None and int(attr_val) < int(val)
75
+ if op == "<=": return attr_val is not None and int(attr_val) <= int(val)
76
+ if op == ">": return attr_val is not None and int(attr_val) > int(val)
77
+ if op == ">=": return attr_val is not None and int(attr_val) >= int(val)
78
+
79
+ # 2. Logical operations from infixNotation: [left, op, right]
80
+ elif expr[1] == "AND":
81
+ return eval_expr(expr[0]) and eval_expr(expr[2])
82
+ elif expr[1] == "OR":
83
+ return eval_expr(expr[0]) or eval_expr(expr[2])
84
+
85
+ # 3. NOT operation: ['NOT', expr]
86
+ elif len(expr) == 2 and expr[0] == "NOT":
87
+ return not eval_expr(expr[1])
88
+
89
+ return False
90
+
91
+ return eval_expr(condition.asList())
92
+
93
+ # Example usage
94
+ if __name__ == "__main__":
95
+ # Get project root directory for demonstration
96
+ current_dir = os.path.dirname(os.path.abspath(__file__))
97
+ project_root = os.path.dirname(current_dir)
98
+ src_dir = os.path.join(project_root, "src")
99
+ tests_dir = os.path.join(project_root, "tests")
100
+ query_str = f"SELECT * FROM '{src_dir}', '{tests_dir}' WHERE extension == 'py'"
101
+ parsed = parse_query(query_str)
102
+ if parsed:
103
+ visitor = QueryVisitor()
104
+ visitor.visit(parsed)
105
+ results = execute_query(visitor.select, visitor.from_dirs, visitor.where)
106
+ print("Matching files:")
107
+ for file in results:
108
+ # Skip files in .venv directory
109
+ if ".venv" not in file:
110
+ print(file)
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: file_query_text
3
+ Version: 0.1.0
4
+ Summary: SQL-like interface for querying files in your filesystem
5
+ Author-email: nik <42a11b@nikdav.is>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/nikdavis/file_query_text
8
+ Project-URL: Bug Tracker, https://github.com/nikdavis/file_query_text/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.12
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: pyparsing>=3.2.3
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8.3.5; extra == "dev"
16
+
17
+ # File Query
18
+
19
+ A SQL-like interface for querying files in your filesystem.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ # Clone the repository
25
+ git clone https://github.com/yourusername/file-query.git
26
+ cd file-query
27
+
28
+ # Install with pip
29
+ pip install -e .
30
+
31
+ # Or use UV
32
+ uv run python -m src.cli "your query"
33
+
34
+ # Install as a permanent tool with UV
35
+ uv tool install .
36
+ # This will install the 'fq' command
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ### Command Line
42
+
43
+ The quickest way to run file-query is with UV:
44
+
45
+ ```bash
46
+ uv run python -m src.cli "your query here"
47
+ ```
48
+
49
+ After installation, you can use the shorthand command:
50
+
51
+ ```bash
52
+ fq "your query here"
53
+ ```
54
+
55
+ #### Basic Usage
56
+
57
+ ```bash
58
+ # Find all Python files
59
+ fq "extension == 'py'"
60
+
61
+ # Find all text files and show their content
62
+ fq "extension == 'txt'" --show-content
63
+ ```
64
+
65
+ #### Advanced Queries
66
+
67
+ File Query supports full SQL-like syntax:
68
+
69
+ ```bash
70
+ # Find all Python files in the src directory
71
+ fq "SELECT * FROM 'src' WHERE extension == 'py'"
72
+
73
+ # Find all files larger than 100KB
74
+ fq "SELECT * FROM '.' WHERE size > 102400"
75
+
76
+ # Complex conditions
77
+ fq "SELECT * FROM '.' WHERE (extension == 'pdf' AND size > 1000000) OR (extension == 'txt' AND NOT name == 'README.txt')"
78
+ ```
79
+
80
+ ## Query Syntax
81
+
82
+ File Query uses a SQL-like syntax:
83
+
84
+ ```sql
85
+ SELECT * FROM 'directory_path' WHERE condition
86
+ ```
87
+
88
+ ### Available Attributes
89
+
90
+ - `extension`: File extension (without the dot)
91
+ - `name`: Filename with extension
92
+ - `size`: File size in bytes
93
+
94
+ ### Operators
95
+
96
+ - Comparison: `==`, `!=`, `<`, `<=`, `>`, `>=`
97
+ - Logical: `AND`, `OR`, `NOT`
98
+
99
+ ## Examples
100
+
101
+ ```bash
102
+ # Find all PDF files
103
+ fq "extension == 'pdf'"
104
+
105
+ # Find all files not named "main.py"
106
+ fq "NOT name == 'main.py'"
107
+
108
+ # Find all large image files
109
+ fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size > 500000"
110
+ ```
@@ -0,0 +1,13 @@
1
+ README.md
2
+ pyproject.toml
3
+ file_query_text/__init__.py
4
+ file_query_text/cli.py
5
+ file_query_text/grammar.py
6
+ file_query_text/main.py
7
+ file_query_text.egg-info/PKG-INFO
8
+ file_query_text.egg-info/SOURCES.txt
9
+ file_query_text.egg-info/dependency_links.txt
10
+ file_query_text.egg-info/entry_points.txt
11
+ file_query_text.egg-info/requires.txt
12
+ file_query_text.egg-info/top_level.txt
13
+ tests/test_main.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ fq = file_query_text.cli:main
@@ -0,0 +1,4 @@
1
+ pyparsing>=3.2.3
2
+
3
+ [dev]
4
+ pytest>=8.3.5
@@ -0,0 +1 @@
1
+ file_query_text
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "file_query_text"
7
+ version = "0.1.0"
8
+ description = "SQL-like interface for querying files in your filesystem"
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = "MIT"
12
+ authors = [
13
+ {name = "nik", email = "42a11b@nikdav.is"}
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ dependencies = [
20
+ "pyparsing>=3.2.3",
21
+ ]
22
+
23
+ [project.urls]
24
+ "Homepage" = "https://github.com/nikdavis/file_query_text"
25
+ "Bug Tracker" = "https://github.com/nikdavis/file_query_text/issues"
26
+
27
+ [project.scripts]
28
+ fq = "file_query_text.cli:main"
29
+
30
+ [tool.setuptools]
31
+ packages = ["file_query_text"]
32
+
33
+ [project.optional-dependencies]
34
+ dev = [
35
+ "pytest>=8.3.5",
36
+ ]
37
+
38
+ [[tool.uv.index]]
39
+ name = "testpypi"
40
+ url = "https://test.pypi.org/simple/"
41
+ publish-url = "https://test.pypi.org/legacy/"
42
+ explicit = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,376 @@
1
+ import os
2
+ import pytest
3
+ import tempfile
4
+ from pathlib import Path
5
+ from file_query_text.main import parse_query, execute_query, QueryVisitor
6
+
7
+ @pytest.fixture
8
+ def temp_dir():
9
+ """Create a temporary directory with test files for each test case."""
10
+ temp_dir = tempfile.TemporaryDirectory()
11
+ root_path = Path(temp_dir.name)
12
+
13
+ # Create test directories
14
+ (root_path / "docs").mkdir()
15
+ (root_path / "downloads").mkdir()
16
+
17
+ # Create test files
18
+ with open(root_path / "docs/report.pdf", "w") as f:
19
+ f.write("Test PDF")
20
+ with open(root_path / "docs/note.txt", "w") as f:
21
+ f.write("Test TXT")
22
+ with open(root_path / "downloads/image.jpg", "w") as f:
23
+ f.write("Test JPG")
24
+
25
+ yield root_path # Provide the path to the test
26
+
27
+ # Cleanup is handled automatically by TemporaryDirectory
28
+
29
+ def test_basic_query(temp_dir):
30
+ """Test SELECT * FROM with a WHERE clause on extension."""
31
+ query_str = f"""
32
+ SELECT *
33
+ FROM '{temp_dir}/docs', '{temp_dir}/downloads'
34
+ WHERE extension == 'pdf'
35
+ """
36
+
37
+ parsed = parse_query(query_str)
38
+ visitor = QueryVisitor()
39
+ visitor.visit(parsed)
40
+
41
+ results = execute_query(
42
+ visitor.select,
43
+ visitor.from_dirs,
44
+ visitor.where
45
+ )
46
+
47
+ # Expected result (only the PDF file)
48
+ expected = [str(temp_dir / "docs/report.pdf")]
49
+
50
+ # Normalize paths for comparison (handle different OS path separators)
51
+ actual = [str(p) for p in results]
52
+ assert sorted(actual) == sorted(expected)
53
+
54
+ def test_multiple_conditions(temp_dir):
55
+ """Test OR conditions."""
56
+ query_str = f"""
57
+ SELECT *
58
+ FROM '{temp_dir}'
59
+ WHERE extension == 'pdf'
60
+ """
61
+
62
+ parsed = parse_query(query_str)
63
+ visitor = QueryVisitor()
64
+ visitor.visit(parsed)
65
+
66
+ results = execute_query(
67
+ visitor.select,
68
+ visitor.from_dirs,
69
+ visitor.where
70
+ )
71
+
72
+ # Check if we got at least one result
73
+ assert len(results) > 0
74
+
75
+ def test_nonexistent_directory():
76
+ """Test query with a non-existent directory."""
77
+ query_str = """
78
+ SELECT * FROM '/nonexistent/path'
79
+ WHERE extension == 'pdf'
80
+ """
81
+ parsed = parse_query(query_str)
82
+ visitor = QueryVisitor()
83
+ visitor.visit(parsed)
84
+
85
+ results = execute_query(visitor.select, visitor.from_dirs, visitor.where)
86
+ assert len(results) == 0
87
+
88
+ # Optional: Test AND / NOT conditions
89
+ def test_combined_conditions(temp_dir):
90
+ """Test AND and NOT conditions."""
91
+ query_str = f"""
92
+ SELECT *
93
+ FROM '{temp_dir}/downloads'
94
+ WHERE extension == 'png'
95
+ """
96
+
97
+ parsed = parse_query(query_str)
98
+ visitor = QueryVisitor()
99
+ visitor.visit(parsed)
100
+
101
+ results = execute_query(
102
+ visitor.select,
103
+ visitor.from_dirs,
104
+ visitor.where
105
+ )
106
+
107
+ # We don't have any png files
108
+ assert len(results) == 0
109
+
110
+ def test_and_conditions(temp_dir):
111
+ """Test AND condition logic."""
112
+ # Create a specific file for this test
113
+ with open(temp_dir / "docs/report_2023.pdf", "w") as f:
114
+ f.write("Test PDF with year")
115
+
116
+ query_str = f"""
117
+ SELECT *
118
+ FROM '{temp_dir}/docs'
119
+ WHERE extension == 'pdf' AND name == 'report_2023.pdf'
120
+ """
121
+
122
+ parsed = parse_query(query_str)
123
+ visitor = QueryVisitor()
124
+ visitor.visit(parsed)
125
+
126
+ results = execute_query(
127
+ visitor.select,
128
+ visitor.from_dirs,
129
+ visitor.where
130
+ )
131
+
132
+ # Expected result (only the matching PDF file)
133
+ expected = [str(temp_dir / "docs/report_2023.pdf")]
134
+
135
+ # Normalize paths for comparison
136
+ actual = [str(p) for p in results]
137
+ assert sorted(actual) == sorted(expected)
138
+
139
+ def test_or_conditions(temp_dir):
140
+ """Test OR condition logic."""
141
+ # Create specific files for this test
142
+ with open(temp_dir / "docs/report_2023.pdf", "w") as f:
143
+ f.write("Test PDF with year")
144
+ with open(temp_dir / "docs/presentation.ppt", "w") as f:
145
+ f.write("Test PPT")
146
+
147
+ query_str = f"""
148
+ SELECT *
149
+ FROM '{temp_dir}/docs'
150
+ WHERE extension == 'pdf' OR extension == 'ppt'
151
+ """
152
+
153
+ parsed = parse_query(query_str)
154
+ visitor = QueryVisitor()
155
+ visitor.visit(parsed)
156
+
157
+ results = execute_query(
158
+ visitor.select,
159
+ visitor.from_dirs,
160
+ visitor.where
161
+ )
162
+
163
+ # Get all files in the directory with the specified extensions
164
+ all_pdf_files = list((temp_dir / "docs").glob("*.pdf"))
165
+ all_ppt_files = list((temp_dir / "docs").glob("*.ppt"))
166
+ expected_files = all_pdf_files + all_ppt_files
167
+ expected = [str(p) for p in expected_files]
168
+
169
+ # Normalize paths for comparison
170
+ actual = [str(p) for p in results]
171
+ assert sorted(actual) == sorted(expected)
172
+
173
+ def test_not_conditions(temp_dir):
174
+ """Test NOT condition logic."""
175
+ # Create specific files for this test
176
+ with open(temp_dir / "docs/report.pdf", "w") as f:
177
+ f.write("Test PDF")
178
+ with open(temp_dir / "docs/presentation.ppt", "w") as f:
179
+ f.write("Test PPT")
180
+ with open(temp_dir / "docs/document.txt", "w") as f:
181
+ f.write("Test TXT")
182
+
183
+ query_str = f"""
184
+ SELECT *
185
+ FROM '{temp_dir}/docs'
186
+ WHERE NOT extension == 'pdf'
187
+ """
188
+
189
+ parsed = parse_query(query_str)
190
+ visitor = QueryVisitor()
191
+ visitor.visit(parsed)
192
+
193
+ results = execute_query(
194
+ visitor.select,
195
+ visitor.from_dirs,
196
+ visitor.where
197
+ )
198
+
199
+ # Query should return all non-PDF files
200
+ all_non_pdf_files = []
201
+ for path in (temp_dir / "docs").glob("*"):
202
+ if path.suffix != ".pdf":
203
+ all_non_pdf_files.append(str(path))
204
+
205
+ # Normalize paths for comparison
206
+ actual = [str(p) for p in results]
207
+ assert sorted(actual) == sorted(all_non_pdf_files)
208
+
209
+ def test_numeric_comparison(temp_dir):
210
+ """Test numerical comparison operators."""
211
+ # Create files with different sizes
212
+ with open(temp_dir / "docs/small.txt", "w") as f:
213
+ f.write("Small") # Size is 5 bytes
214
+ with open(temp_dir / "docs/medium.txt", "w") as f:
215
+ f.write("Medium text" * 5) # Size > 10 bytes
216
+ with open(temp_dir / "docs/large.txt", "w") as f:
217
+ f.write("Large text file" * 20) # Size > 100 bytes
218
+
219
+ # Query: Find files larger than 100 bytes
220
+ query_str = f"SELECT * FROM '{temp_dir}/docs' WHERE size > 100"
221
+
222
+ parsed = parse_query(query_str)
223
+ visitor = QueryVisitor()
224
+ visitor.visit(parsed)
225
+
226
+ results = execute_query(
227
+ visitor.select,
228
+ visitor.from_dirs,
229
+ visitor.where
230
+ )
231
+
232
+ # Filter files manually to compare
233
+ large_files = []
234
+ for path in (temp_dir / "docs").glob("*"):
235
+ if path.stat().st_size > 100:
236
+ large_files.append(str(path))
237
+
238
+ # Normalize paths for comparison
239
+ actual = [str(p) for p in results]
240
+ assert sorted(actual) == sorted(large_files)
241
+
242
+ def test_complex_nested_conditions(temp_dir):
243
+ """Test complex nested logical conditions."""
244
+ # Create specific test files with various properties
245
+ with open(temp_dir / "docs/small_report.pdf", "w") as f:
246
+ f.write("Small PDF") # Small PDF file
247
+ with open(temp_dir / "docs/large_report.pdf", "w") as f:
248
+ f.write("Large PDF file" * 20) # Large PDF file
249
+ with open(temp_dir / "docs/small_note.txt", "w") as f:
250
+ f.write("Small TXT") # Small TXT file
251
+ with open(temp_dir / "docs/large_note.txt", "w") as f:
252
+ f.write("Large TXT file" * 20) # Large TXT file
253
+ with open(temp_dir / "docs/image.jpg", "w") as f:
254
+ f.write("Image file" * 5) # JPG file
255
+
256
+ # Complex query: Find (PDF files that are large) OR (TXT files that are not small)
257
+ query_str = f"""
258
+ SELECT *
259
+ FROM '{temp_dir}/docs'
260
+ WHERE (extension == 'pdf' AND size > 100) OR (extension == 'txt' AND NOT size < 50)
261
+ """
262
+
263
+ parsed = parse_query(query_str)
264
+ visitor = QueryVisitor()
265
+ visitor.visit(parsed)
266
+
267
+ results = execute_query(
268
+ visitor.select,
269
+ visitor.from_dirs,
270
+ visitor.where
271
+ )
272
+
273
+ # Manually determine expected results
274
+ expected_files = []
275
+ for path in (temp_dir / "docs").glob("*"):
276
+ ext = path.suffix[1:] # Remove the dot
277
+ size = path.stat().st_size
278
+ if (ext == 'pdf' and size > 100) or (ext == 'txt' and size >= 50):
279
+ expected_files.append(str(path))
280
+
281
+ # Normalize paths for comparison
282
+ actual = [str(p) for p in results]
283
+ assert sorted(actual) == sorted(expected_files)
284
+
285
+ def test_query_without_where_clause(temp_dir):
286
+ """Test SELECT * FROM without a WHERE clause."""
287
+ query_str = f"""
288
+ SELECT *
289
+ FROM '{temp_dir}/docs'
290
+ """
291
+
292
+ parsed = parse_query(query_str)
293
+ visitor = QueryVisitor()
294
+ visitor.visit(parsed)
295
+
296
+ results = execute_query(
297
+ visitor.select,
298
+ visitor.from_dirs,
299
+ visitor.where
300
+ )
301
+
302
+ # All files in the docs directory should be returned
303
+ expected_files = []
304
+ for path in (temp_dir / "docs").glob("*"):
305
+ if path.is_file():
306
+ expected_files.append(str(path))
307
+
308
+ # Normalize paths for comparison
309
+ actual = [str(p) for p in results]
310
+ assert sorted(actual) == sorted(expected_files)
311
+
312
+ def test_empty_query(temp_dir):
313
+ """Test empty query which should return all files."""
314
+ # Create a test file structure
315
+ with open(temp_dir / "docs/extra_file.txt", "w") as f:
316
+ f.write("Extra test file")
317
+
318
+ # First, construct the query string that the CLI would create for an empty query
319
+ query_str = f"SELECT * FROM '{temp_dir}'"
320
+
321
+ parsed = parse_query(query_str)
322
+ visitor = QueryVisitor()
323
+ visitor.visit(parsed)
324
+
325
+ results = execute_query(
326
+ visitor.select,
327
+ visitor.from_dirs,
328
+ visitor.where
329
+ )
330
+
331
+ # Count all files in all subdirectories
332
+ expected_files = []
333
+ for path in temp_dir.glob("**/*"):
334
+ if path.is_file():
335
+ expected_files.append(str(path))
336
+
337
+ # Normalize paths for comparison
338
+ actual = [str(p) for p in results]
339
+ assert sorted(actual) == sorted(expected_files)
340
+
341
+ # Ensure we're getting more than just one file type
342
+ extensions = {os.path.splitext(p)[1] for p in actual}
343
+ assert len(extensions) > 1, "Empty query should return files with different extensions"
344
+
345
+ def test_no_argument_query(temp_dir):
346
+ """Test when no query argument is passed (None), should be treated as empty string."""
347
+ # Create a test file structure
348
+ with open(temp_dir / "docs/extra_file2.txt", "w") as f:
349
+ f.write("Another test file")
350
+
351
+ # Simulate what happens when no argument is passed (CLI would convert to empty string)
352
+ query_str = f"SELECT * FROM '{temp_dir}'"
353
+
354
+ parsed = parse_query(query_str)
355
+ visitor = QueryVisitor()
356
+ visitor.visit(parsed)
357
+
358
+ results = execute_query(
359
+ visitor.select,
360
+ visitor.from_dirs,
361
+ visitor.where
362
+ )
363
+
364
+ # Count all files in all subdirectories
365
+ expected_files = []
366
+ for path in temp_dir.glob("**/*"):
367
+ if path.is_file():
368
+ expected_files.append(str(path))
369
+
370
+ # Normalize paths for comparison
371
+ actual = [str(p) for p in results]
372
+ assert sorted(actual) == sorted(expected_files)
373
+
374
+ # Ensure we're getting more than just one file type
375
+ extensions = {os.path.splitext(p)[1] for p in actual}
376
+ assert len(extensions) > 1, "Query with no argument should return files with different extensions"