file_query_text 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,4 +2,4 @@
2
2
  SQL-like interface for querying files in your filesystem.
3
3
  """
4
4
 
5
- __version__ = "0.1.6"
5
+ __version__ = "0.1.8"
@@ -0,0 +1,224 @@
1
+ import collections
2
+ import os
3
+ import re
4
+
5
+ from os.path import abspath, dirname, join
6
+ from pathlib import Path
7
+ from typing import Reversible, Union
8
+
9
+ def handle_negation(file_path, rules: Reversible["IgnoreRule"]):
10
+ for rule in reversed(rules):
11
+ if rule.match(file_path):
12
+ return not rule.negation
13
+ return False
14
+
15
+ def parse_gitignore(full_path, base_dir=None):
16
+ if base_dir is None:
17
+ base_dir = dirname(full_path)
18
+ with open(full_path) as ignore_file:
19
+ return _parse_gitignore_lines(ignore_file, full_path, base_dir)
20
+
21
+ def parse_gitignore_str(gitignore_str, base_dir):
22
+ full_path = join(base_dir, '.gitignore')
23
+ lines = gitignore_str.splitlines()
24
+ return _parse_gitignore_lines(lines, full_path, base_dir)
25
+
26
+ def _parse_gitignore_lines(lines, full_path, base_dir):
27
+ base_dir = _normalize_path(base_dir)
28
+ rules = []
29
+ for line_no, line in enumerate(lines, start=1):
30
+ rule = rule_from_pattern(
31
+ line.rstrip('\n'), base_path=base_dir, source=(full_path, line_no))
32
+ if rule:
33
+ rules.append(rule)
34
+ if not any(r.negation for r in rules):
35
+ return lambda file_path: any(r.match(file_path) for r in rules)
36
+ else:
37
+ # We have negation rules. We can't use a simple "any" to evaluate them.
38
+ # Later rules override earlier rules.
39
+ return lambda file_path: handle_negation(file_path, rules)
40
+
41
+ def rule_from_pattern(pattern, base_path=None, source=None):
42
+ """
43
+ Take a .gitignore match pattern, such as "*.py[cod]" or "**/*.bak",
44
+ and return an IgnoreRule suitable for matching against files and
45
+ directories. Patterns which do not match files, such as comments
46
+ and blank lines, will return None.
47
+ Because git allows for nested .gitignore files, a base_path value
48
+ is required for correct behavior. The base path should be absolute.
49
+ """
50
+ # Store the exact pattern for our repr and string functions
51
+ orig_pattern = pattern
52
+ # Early returns follow
53
+ # Discard comments and separators
54
+ if pattern.strip() == '' or pattern[0] == '#':
55
+ return
56
+ # Strip leading bang before examining double asterisks
57
+ if pattern[0] == '!':
58
+ negation = True
59
+ pattern = pattern[1:]
60
+ else:
61
+ negation = False
62
+ # Multi-asterisks not surrounded by slashes (or at the start/end) should
63
+ # be treated like single-asterisks.
64
+ pattern = re.sub(r'([^/])\*{2,}', r'\1*', pattern)
65
+ pattern = re.sub(r'\*{2,}([^/])', r'*\1', pattern)
66
+
67
+ # Special-casing '/', which doesn't match any files or directories
68
+ if pattern.rstrip() == '/':
69
+ return
70
+
71
+ directory_only = pattern[-1] == '/'
72
+ # A slash is a sign that we're tied to the base_path of our rule
73
+ # set.
74
+ anchored = '/' in pattern[:-1]
75
+ if pattern[0] == '/':
76
+ pattern = pattern[1:]
77
+ if pattern[0] == '*' and len(pattern) >= 2 and pattern[1] == '*':
78
+ pattern = pattern[2:]
79
+ anchored = False
80
+ if pattern[0] == '/':
81
+ pattern = pattern[1:]
82
+ if pattern[-1] == '/':
83
+ pattern = pattern[:-1]
84
+ # patterns with leading hashes or exclamation marks are escaped with a
85
+ # backslash in front, unescape it
86
+ if pattern[0] == '\\' and pattern[1] in ('#', '!'):
87
+ pattern = pattern[1:]
88
+ # trailing spaces are ignored unless they are escaped with a backslash
89
+ i = len(pattern)-1
90
+ striptrailingspaces = True
91
+ while i > 1 and pattern[i] == ' ':
92
+ if pattern[i-1] == '\\':
93
+ pattern = pattern[:i-1] + pattern[i:]
94
+ i = i - 1
95
+ striptrailingspaces = False
96
+ else:
97
+ if striptrailingspaces:
98
+ pattern = pattern[:i]
99
+ i = i - 1
100
+ regex = fnmatch_pathname_to_regex(
101
+ pattern, directory_only, negation, anchored=bool(anchored)
102
+ )
103
+ return IgnoreRule(
104
+ pattern=orig_pattern,
105
+ regex=regex,
106
+ negation=negation,
107
+ directory_only=directory_only,
108
+ anchored=anchored,
109
+ base_path=base_path if base_path else None,
110
+ source=source
111
+ )
112
+
113
+
114
+ IGNORE_RULE_FIELDS = [
115
+ 'pattern', 'regex', # Basic values
116
+ 'negation', 'directory_only', 'anchored', # Behavior flags
117
+ 'base_path', # Meaningful for gitignore-style behavior
118
+ 'source' # (file, line) tuple for reporting
119
+ ]
120
+
121
+
122
+ class IgnoreRule(collections.namedtuple('IgnoreRule_', IGNORE_RULE_FIELDS)):
123
+ def __str__(self):
124
+ return self.pattern
125
+
126
+ def __repr__(self):
127
+ return ''.join(['IgnoreRule(\'', self.pattern, '\')'])
128
+
129
+ def match(self, abs_path: Union[str, Path]):
130
+ matched = False
131
+ if self.base_path:
132
+ rel_path = str(_normalize_path(abs_path).relative_to(self.base_path))
133
+ else:
134
+ rel_path = str(_normalize_path(abs_path))
135
+ # Path() strips the trailing slash, so we need to preserve it
136
+ # in case of directory-only negation
137
+ if self.negation and type(abs_path) == str and abs_path[-1] == '/':
138
+ rel_path += '/'
139
+ if rel_path.startswith('./'):
140
+ rel_path = rel_path[2:]
141
+ if re.search(self.regex, rel_path):
142
+ matched = True
143
+ return matched
144
+
145
+
146
+ # Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME
147
+ # option that .gitignore's behavior depends on.
148
+ def fnmatch_pathname_to_regex(
149
+ pattern, directory_only: bool, negation: bool, anchored: bool = False
150
+ ):
151
+ """
152
+ Implements fnmatch style-behavior, as though with FNM_PATHNAME flagged;
153
+ the path separator will not match shell-style '*' and '.' wildcards.
154
+ """
155
+ i, n = 0, len(pattern)
156
+
157
+ seps = [re.escape(os.sep)]
158
+ if os.altsep is not None:
159
+ seps.append(re.escape(os.altsep))
160
+ seps_group = '[' + '|'.join(seps) + ']'
161
+ nonsep = r'[^{}]'.format('|'.join(seps))
162
+
163
+ res = []
164
+ while i < n:
165
+ c = pattern[i]
166
+ i += 1
167
+ if c == '*':
168
+ try:
169
+ if pattern[i] == '*':
170
+ i += 1
171
+ if i < n and pattern[i] == '/':
172
+ i += 1
173
+ res.append(''.join(['(.*', seps_group, ')?']))
174
+ else:
175
+ res.append('.*')
176
+ else:
177
+ res.append(''.join([nonsep, '*']))
178
+ except IndexError:
179
+ res.append(''.join([nonsep, '*']))
180
+ elif c == '?':
181
+ res.append(nonsep)
182
+ elif c == '/':
183
+ res.append(seps_group)
184
+ elif c == '[':
185
+ j = i
186
+ if j < n and pattern[j] == '!':
187
+ j += 1
188
+ if j < n and pattern[j] == ']':
189
+ j += 1
190
+ while j < n and pattern[j] != ']':
191
+ j += 1
192
+ if j >= n:
193
+ res.append('\\[')
194
+ else:
195
+ stuff = pattern[i:j].replace('\\', '\\\\').replace('/', '')
196
+ i = j + 1
197
+ if stuff[0] == '!':
198
+ stuff = ''.join(['^', stuff[1:]])
199
+ elif stuff[0] == '^':
200
+ stuff = ''.join('\\' + stuff)
201
+ res.append('[{}]'.format(stuff))
202
+ else:
203
+ res.append(re.escape(c))
204
+ if anchored:
205
+ res.insert(0, '^')
206
+ else:
207
+ res.insert(0, f"(^|{seps_group})")
208
+ if not directory_only:
209
+ res.append('($|\\/.*$)')
210
+ elif directory_only and negation:
211
+ res.append('/$')
212
+ else:
213
+ res.append('($|\\/)')
214
+ return ''.join(res)
215
+
216
+
217
+ def _normalize_path(path: Union[str, Path]) -> Path:
218
+ """Normalize a path without resolving symlinks.
219
+
220
+ This is equivalent to `Path.resolve()` except that it does not resolve symlinks.
221
+ Note that this simplifies paths by removing double slashes, `..`, `.` etc. like
222
+ `Path.resolve()` does.
223
+ """
224
+ return Path(abspath(path))
@@ -27,6 +27,7 @@ WHERE = Suppress(CaselessKeyword("WHERE"))
27
27
  AND = CaselessKeyword("AND")
28
28
  OR = CaselessKeyword("OR")
29
29
  NOT = CaselessKeyword("NOT")
30
+ LIKE = CaselessKeyword("LIKE")
30
31
 
31
32
  # Define identifiers and literals
32
33
  IDENTIFIER = Word(alphas + "_")
@@ -36,7 +37,7 @@ NUMERIC_LITERAL = pyparsing_common.integer
36
37
  DIRECTORY_LIST = Group(delimitedList(STRING_LITERAL))
37
38
 
38
39
  # Define comparison operators
39
- COMPARISON_OP = oneOf("= == != <> < <= > >=")
40
+ COMPARISON_OP = oneOf("= == != <> < <= > >=") | LIKE
40
41
  ATTRIBUTE = IDENTIFIER + Suppress("=") + STRING_LITERAL
41
42
 
42
43
  # Define basic condition with support for both string and numeric literals
file_query_text/main.py CHANGED
@@ -1,8 +1,9 @@
1
1
  import os
2
2
  import sys
3
3
  from file_query_text.grammar import query # Import the fixed grammar
4
- import gitignore_parser
4
+ import file_query_text.gitignore_parser as gitignore_parser
5
5
  import os.path
6
+ import re
6
7
 
7
8
 
8
9
  def parse_query(query_str):
@@ -128,6 +129,15 @@ def evaluate_conditions(file_path, condition):
128
129
  if op == "<=": return attr_val is not None and int(attr_val) <= int(val)
129
130
  if op == ">": return attr_val is not None and int(attr_val) > int(val)
130
131
  if op == ">=": return attr_val is not None and int(attr_val) >= int(val)
132
+ if op.upper() == "LIKE":
133
+ if attr_val is None:
134
+ return False
135
+ # Convert SQL LIKE pattern (with % wildcards) to regex pattern
136
+ # Escape any regex special characters in the pattern except %
137
+ pattern = re.escape(val).replace('\\%', '%') # Unescape % after escaping everything else
138
+ pattern = pattern.replace("%", ".*")
139
+ pattern = f"^{pattern}$" # Anchor pattern to match whole string
140
+ return bool(re.search(pattern, str(attr_val), re.IGNORECASE))
131
141
 
132
142
  # 2. Logical operations from infixNotation: [left, op, right]
133
143
  elif expr[1] == "AND":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: file_query_text
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: SQL-like interface for querying files in your filesystem
5
5
  Author-email: nik <42a11b@nikdav.is>
6
6
  License-Expression: MIT
@@ -11,11 +11,10 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.12
12
12
  Description-Content-Type: text/markdown
13
13
  Requires-Dist: pyparsing>=3.2.3
14
- Requires-Dist: gitignore-parser>=0.1.12
15
14
  Provides-Extra: dev
16
15
  Requires-Dist: pytest>=8.3.5; extra == "dev"
17
16
 
18
- # File Query
17
+ # File Query (fq)
19
18
 
20
19
  A SQL-like interface for querying files in your filesystem.
21
20
 
@@ -112,4 +111,20 @@ fq "SELECT * FROM '.' WHERE (extension == 'jpg' OR extension == 'png') AND size
112
111
 
113
112
  # Find files with 'config' in their path
114
113
  fq "path == '.*config.*'"
114
+
115
+ ### Using wildcards with the LIKE operator
116
+
117
+ Find all Python files with "test" in their name:
118
+ ```
119
+ fq "name LIKE '%test%.py'"
120
+ ```
121
+
122
+ Find all files with a specific prefix:
123
+ ```
124
+ fq "name LIKE 'config%'"
125
+ ```
126
+
127
+ Find all markdown files in a specific year's folder:
128
+ ```
129
+ fq "path LIKE '%/2023/%' AND extension == 'md'"
115
130
  ```
@@ -0,0 +1,10 @@
1
+ file_query_text/__init__.py,sha256=-84ne-3HrCOzvhTwsN8M7-xOwSv24H1wen5OvnEo6_s,89
2
+ file_query_text/cli.py,sha256=eijCT1pHk4wtBhhmFHyeTOoLNz0zwk7Bm4izRLrjZb4,3709
3
+ file_query_text/gitignore_parser.py,sha256=PZk5MKyW5e8NXCyfC8w3r2JIp1HeF5CpZ5MdXOWlPNM,7734
4
+ file_query_text/grammar.py,sha256=mnyOd4UPg489Z-HQcxEySI41H02iDZRBbpaL3-mOFMU,1693
5
+ file_query_text/main.py,sha256=YswvqX-4yLGZ_qSTFBUjFxG7OAoLyB9FQ2PIVY-CpoU,7688
6
+ file_query_text-0.1.8.dist-info/METADATA,sha256=EPLhtU8ysQfuEo9pZGv7hVRa8YOeiCgSZIEgi7q6Shk,2753
7
+ file_query_text-0.1.8.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
8
+ file_query_text-0.1.8.dist-info/entry_points.txt,sha256=rNFYWzvcIsUZkGNsc_E_B5HyYRnqqdj_u8_IeQpw1wo,48
9
+ file_query_text-0.1.8.dist-info/top_level.txt,sha256=o1FzSvLa6kSV61b7RLHWRhEezc96m05YwIKqjuWUSxU,16
10
+ file_query_text-0.1.8.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- file_query_text/__init__.py,sha256=kJ33So_6rzvtBPQOQZaG0-_cw6mbIzuaRqzw8lY2AbI,89
2
- file_query_text/cli.py,sha256=eijCT1pHk4wtBhhmFHyeTOoLNz0zwk7Bm4izRLrjZb4,3709
3
- file_query_text/grammar.py,sha256=XanSi9VCKuIQNlOMXRBZbeWxaJj2UhMJPDIYKgw4lEQ,1655
4
- file_query_text/main.py,sha256=YUdNijSm_iIcb2rkdKtDsE0Jl2tVbGi0J3G-bSO5_J0,7024
5
- file_query_text-0.1.6.dist-info/METADATA,sha256=CUk07ZjACLMQCO_EdQ076ChtqtifqdITEZFtIOYM_sY,2480
6
- file_query_text-0.1.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
7
- file_query_text-0.1.6.dist-info/entry_points.txt,sha256=rNFYWzvcIsUZkGNsc_E_B5HyYRnqqdj_u8_IeQpw1wo,48
8
- file_query_text-0.1.6.dist-info/top_level.txt,sha256=o1FzSvLa6kSV61b7RLHWRhEezc96m05YwIKqjuWUSxU,16
9
- file_query_text-0.1.6.dist-info/RECORD,,