file_query_text 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- file_query_text/__init__.py +1 -1
- file_query_text/gitignore_parser.py +224 -0
- file_query_text/main.py +1 -1
- {file_query_text-0.1.6.dist-info → file_query_text-0.1.7.dist-info}/METADATA +1 -2
- file_query_text-0.1.7.dist-info/RECORD +10 -0
- file_query_text-0.1.6.dist-info/RECORD +0 -9
- {file_query_text-0.1.6.dist-info → file_query_text-0.1.7.dist-info}/WHEEL +0 -0
- {file_query_text-0.1.6.dist-info → file_query_text-0.1.7.dist-info}/entry_points.txt +0 -0
- {file_query_text-0.1.6.dist-info → file_query_text-0.1.7.dist-info}/top_level.txt +0 -0
file_query_text/__init__.py
CHANGED
@@ -0,0 +1,224 @@
|
|
1
|
+
import collections
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
|
5
|
+
from os.path import abspath, dirname, join
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Reversible, Union
|
8
|
+
|
9
|
+
def handle_negation(file_path, rules: Reversible["IgnoreRule"]):
|
10
|
+
for rule in reversed(rules):
|
11
|
+
if rule.match(file_path):
|
12
|
+
return not rule.negation
|
13
|
+
return False
|
14
|
+
|
15
|
+
def parse_gitignore(full_path, base_dir=None):
|
16
|
+
if base_dir is None:
|
17
|
+
base_dir = dirname(full_path)
|
18
|
+
with open(full_path) as ignore_file:
|
19
|
+
return _parse_gitignore_lines(ignore_file, full_path, base_dir)
|
20
|
+
|
21
|
+
def parse_gitignore_str(gitignore_str, base_dir):
|
22
|
+
full_path = join(base_dir, '.gitignore')
|
23
|
+
lines = gitignore_str.splitlines()
|
24
|
+
return _parse_gitignore_lines(lines, full_path, base_dir)
|
25
|
+
|
26
|
+
def _parse_gitignore_lines(lines, full_path, base_dir):
|
27
|
+
base_dir = _normalize_path(base_dir)
|
28
|
+
rules = []
|
29
|
+
for line_no, line in enumerate(lines, start=1):
|
30
|
+
rule = rule_from_pattern(
|
31
|
+
line.rstrip('\n'), base_path=base_dir, source=(full_path, line_no))
|
32
|
+
if rule:
|
33
|
+
rules.append(rule)
|
34
|
+
if not any(r.negation for r in rules):
|
35
|
+
return lambda file_path: any(r.match(file_path) for r in rules)
|
36
|
+
else:
|
37
|
+
# We have negation rules. We can't use a simple "any" to evaluate them.
|
38
|
+
# Later rules override earlier rules.
|
39
|
+
return lambda file_path: handle_negation(file_path, rules)
|
40
|
+
|
41
|
+
def rule_from_pattern(pattern, base_path=None, source=None):
|
42
|
+
"""
|
43
|
+
Take a .gitignore match pattern, such as "*.py[cod]" or "**/*.bak",
|
44
|
+
and return an IgnoreRule suitable for matching against files and
|
45
|
+
directories. Patterns which do not match files, such as comments
|
46
|
+
and blank lines, will return None.
|
47
|
+
Because git allows for nested .gitignore files, a base_path value
|
48
|
+
is required for correct behavior. The base path should be absolute.
|
49
|
+
"""
|
50
|
+
# Store the exact pattern for our repr and string functions
|
51
|
+
orig_pattern = pattern
|
52
|
+
# Early returns follow
|
53
|
+
# Discard comments and separators
|
54
|
+
if pattern.strip() == '' or pattern[0] == '#':
|
55
|
+
return
|
56
|
+
# Strip leading bang before examining double asterisks
|
57
|
+
if pattern[0] == '!':
|
58
|
+
negation = True
|
59
|
+
pattern = pattern[1:]
|
60
|
+
else:
|
61
|
+
negation = False
|
62
|
+
# Multi-asterisks not surrounded by slashes (or at the start/end) should
|
63
|
+
# be treated like single-asterisks.
|
64
|
+
pattern = re.sub(r'([^/])\*{2,}', r'\1*', pattern)
|
65
|
+
pattern = re.sub(r'\*{2,}([^/])', r'*\1', pattern)
|
66
|
+
|
67
|
+
# Special-casing '/', which doesn't match any files or directories
|
68
|
+
if pattern.rstrip() == '/':
|
69
|
+
return
|
70
|
+
|
71
|
+
directory_only = pattern[-1] == '/'
|
72
|
+
# A slash is a sign that we're tied to the base_path of our rule
|
73
|
+
# set.
|
74
|
+
anchored = '/' in pattern[:-1]
|
75
|
+
if pattern[0] == '/':
|
76
|
+
pattern = pattern[1:]
|
77
|
+
if pattern[0] == '*' and len(pattern) >= 2 and pattern[1] == '*':
|
78
|
+
pattern = pattern[2:]
|
79
|
+
anchored = False
|
80
|
+
if pattern[0] == '/':
|
81
|
+
pattern = pattern[1:]
|
82
|
+
if pattern[-1] == '/':
|
83
|
+
pattern = pattern[:-1]
|
84
|
+
# patterns with leading hashes or exclamation marks are escaped with a
|
85
|
+
# backslash in front, unescape it
|
86
|
+
if pattern[0] == '\\' and pattern[1] in ('#', '!'):
|
87
|
+
pattern = pattern[1:]
|
88
|
+
# trailing spaces are ignored unless they are escaped with a backslash
|
89
|
+
i = len(pattern)-1
|
90
|
+
striptrailingspaces = True
|
91
|
+
while i > 1 and pattern[i] == ' ':
|
92
|
+
if pattern[i-1] == '\\':
|
93
|
+
pattern = pattern[:i-1] + pattern[i:]
|
94
|
+
i = i - 1
|
95
|
+
striptrailingspaces = False
|
96
|
+
else:
|
97
|
+
if striptrailingspaces:
|
98
|
+
pattern = pattern[:i]
|
99
|
+
i = i - 1
|
100
|
+
regex = fnmatch_pathname_to_regex(
|
101
|
+
pattern, directory_only, negation, anchored=bool(anchored)
|
102
|
+
)
|
103
|
+
return IgnoreRule(
|
104
|
+
pattern=orig_pattern,
|
105
|
+
regex=regex,
|
106
|
+
negation=negation,
|
107
|
+
directory_only=directory_only,
|
108
|
+
anchored=anchored,
|
109
|
+
base_path=base_path if base_path else None,
|
110
|
+
source=source
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
IGNORE_RULE_FIELDS = [
|
115
|
+
'pattern', 'regex', # Basic values
|
116
|
+
'negation', 'directory_only', 'anchored', # Behavior flags
|
117
|
+
'base_path', # Meaningful for gitignore-style behavior
|
118
|
+
'source' # (file, line) tuple for reporting
|
119
|
+
]
|
120
|
+
|
121
|
+
|
122
|
+
class IgnoreRule(collections.namedtuple('IgnoreRule_', IGNORE_RULE_FIELDS)):
|
123
|
+
def __str__(self):
|
124
|
+
return self.pattern
|
125
|
+
|
126
|
+
def __repr__(self):
|
127
|
+
return ''.join(['IgnoreRule(\'', self.pattern, '\')'])
|
128
|
+
|
129
|
+
def match(self, abs_path: Union[str, Path]):
|
130
|
+
matched = False
|
131
|
+
if self.base_path:
|
132
|
+
rel_path = str(_normalize_path(abs_path).relative_to(self.base_path))
|
133
|
+
else:
|
134
|
+
rel_path = str(_normalize_path(abs_path))
|
135
|
+
# Path() strips the trailing slash, so we need to preserve it
|
136
|
+
# in case of directory-only negation
|
137
|
+
if self.negation and type(abs_path) == str and abs_path[-1] == '/':
|
138
|
+
rel_path += '/'
|
139
|
+
if rel_path.startswith('./'):
|
140
|
+
rel_path = rel_path[2:]
|
141
|
+
if re.search(self.regex, rel_path):
|
142
|
+
matched = True
|
143
|
+
return matched
|
144
|
+
|
145
|
+
|
146
|
+
# Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME
|
147
|
+
# option that .gitignore's behavior depends on.
|
148
|
+
def fnmatch_pathname_to_regex(
|
149
|
+
pattern, directory_only: bool, negation: bool, anchored: bool = False
|
150
|
+
):
|
151
|
+
"""
|
152
|
+
Implements fnmatch style-behavior, as though with FNM_PATHNAME flagged;
|
153
|
+
the path separator will not match shell-style '*' and '.' wildcards.
|
154
|
+
"""
|
155
|
+
i, n = 0, len(pattern)
|
156
|
+
|
157
|
+
seps = [re.escape(os.sep)]
|
158
|
+
if os.altsep is not None:
|
159
|
+
seps.append(re.escape(os.altsep))
|
160
|
+
seps_group = '[' + '|'.join(seps) + ']'
|
161
|
+
nonsep = r'[^{}]'.format('|'.join(seps))
|
162
|
+
|
163
|
+
res = []
|
164
|
+
while i < n:
|
165
|
+
c = pattern[i]
|
166
|
+
i += 1
|
167
|
+
if c == '*':
|
168
|
+
try:
|
169
|
+
if pattern[i] == '*':
|
170
|
+
i += 1
|
171
|
+
if i < n and pattern[i] == '/':
|
172
|
+
i += 1
|
173
|
+
res.append(''.join(['(.*', seps_group, ')?']))
|
174
|
+
else:
|
175
|
+
res.append('.*')
|
176
|
+
else:
|
177
|
+
res.append(''.join([nonsep, '*']))
|
178
|
+
except IndexError:
|
179
|
+
res.append(''.join([nonsep, '*']))
|
180
|
+
elif c == '?':
|
181
|
+
res.append(nonsep)
|
182
|
+
elif c == '/':
|
183
|
+
res.append(seps_group)
|
184
|
+
elif c == '[':
|
185
|
+
j = i
|
186
|
+
if j < n and pattern[j] == '!':
|
187
|
+
j += 1
|
188
|
+
if j < n and pattern[j] == ']':
|
189
|
+
j += 1
|
190
|
+
while j < n and pattern[j] != ']':
|
191
|
+
j += 1
|
192
|
+
if j >= n:
|
193
|
+
res.append('\\[')
|
194
|
+
else:
|
195
|
+
stuff = pattern[i:j].replace('\\', '\\\\').replace('/', '')
|
196
|
+
i = j + 1
|
197
|
+
if stuff[0] == '!':
|
198
|
+
stuff = ''.join(['^', stuff[1:]])
|
199
|
+
elif stuff[0] == '^':
|
200
|
+
stuff = ''.join('\\' + stuff)
|
201
|
+
res.append('[{}]'.format(stuff))
|
202
|
+
else:
|
203
|
+
res.append(re.escape(c))
|
204
|
+
if anchored:
|
205
|
+
res.insert(0, '^')
|
206
|
+
else:
|
207
|
+
res.insert(0, f"(^|{seps_group})")
|
208
|
+
if not directory_only:
|
209
|
+
res.append('($|\\/.*$)')
|
210
|
+
elif directory_only and negation:
|
211
|
+
res.append('/$')
|
212
|
+
else:
|
213
|
+
res.append('($|\\/)')
|
214
|
+
return ''.join(res)
|
215
|
+
|
216
|
+
|
217
|
+
def _normalize_path(path: Union[str, Path]) -> Path:
|
218
|
+
"""Normalize a path without resolving symlinks.
|
219
|
+
|
220
|
+
This is equivalent to `Path.resolve()` except that it does not resolve symlinks.
|
221
|
+
Note that this simplifies paths by removing double slashes, `..`, `.` etc. like
|
222
|
+
`Path.resolve()` does.
|
223
|
+
"""
|
224
|
+
return Path(abspath(path))
|
file_query_text/main.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: file_query_text
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.7
|
4
4
|
Summary: SQL-like interface for querying files in your filesystem
|
5
5
|
Author-email: nik <42a11b@nikdav.is>
|
6
6
|
License-Expression: MIT
|
@@ -11,7 +11,6 @@ Classifier: Operating System :: OS Independent
|
|
11
11
|
Requires-Python: >=3.12
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
Requires-Dist: pyparsing>=3.2.3
|
14
|
-
Requires-Dist: gitignore-parser>=0.1.12
|
15
14
|
Provides-Extra: dev
|
16
15
|
Requires-Dist: pytest>=8.3.5; extra == "dev"
|
17
16
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
file_query_text/__init__.py,sha256=0koRSyAOr7mcK_M2meJyNhIJ9Bu-dOUa-wR1SJdXvw4,89
|
2
|
+
file_query_text/cli.py,sha256=eijCT1pHk4wtBhhmFHyeTOoLNz0zwk7Bm4izRLrjZb4,3709
|
3
|
+
file_query_text/gitignore_parser.py,sha256=PZk5MKyW5e8NXCyfC8w3r2JIp1HeF5CpZ5MdXOWlPNM,7734
|
4
|
+
file_query_text/grammar.py,sha256=XanSi9VCKuIQNlOMXRBZbeWxaJj2UhMJPDIYKgw4lEQ,1655
|
5
|
+
file_query_text/main.py,sha256=PQGwULcO0R8MxMcWcyjcXbH6UyZZAgNOML2zACVVZu4,7060
|
6
|
+
file_query_text-0.1.7.dist-info/METADATA,sha256=gx0dg_yfDf1x94hDMIDB4qwLb3Gsi03c_KslaM1GS_A,2440
|
7
|
+
file_query_text-0.1.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
8
|
+
file_query_text-0.1.7.dist-info/entry_points.txt,sha256=rNFYWzvcIsUZkGNsc_E_B5HyYRnqqdj_u8_IeQpw1wo,48
|
9
|
+
file_query_text-0.1.7.dist-info/top_level.txt,sha256=o1FzSvLa6kSV61b7RLHWRhEezc96m05YwIKqjuWUSxU,16
|
10
|
+
file_query_text-0.1.7.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
file_query_text/__init__.py,sha256=kJ33So_6rzvtBPQOQZaG0-_cw6mbIzuaRqzw8lY2AbI,89
|
2
|
-
file_query_text/cli.py,sha256=eijCT1pHk4wtBhhmFHyeTOoLNz0zwk7Bm4izRLrjZb4,3709
|
3
|
-
file_query_text/grammar.py,sha256=XanSi9VCKuIQNlOMXRBZbeWxaJj2UhMJPDIYKgw4lEQ,1655
|
4
|
-
file_query_text/main.py,sha256=YUdNijSm_iIcb2rkdKtDsE0Jl2tVbGi0J3G-bSO5_J0,7024
|
5
|
-
file_query_text-0.1.6.dist-info/METADATA,sha256=CUk07ZjACLMQCO_EdQ076ChtqtifqdITEZFtIOYM_sY,2480
|
6
|
-
file_query_text-0.1.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
7
|
-
file_query_text-0.1.6.dist-info/entry_points.txt,sha256=rNFYWzvcIsUZkGNsc_E_B5HyYRnqqdj_u8_IeQpw1wo,48
|
8
|
-
file_query_text-0.1.6.dist-info/top_level.txt,sha256=o1FzSvLa6kSV61b7RLHWRhEezc96m05YwIKqjuWUSxU,16
|
9
|
-
file_query_text-0.1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|