frontmatter-utils 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fmu/__init__.py +25 -0
- fmu/__main__.py +9 -0
- fmu/cli.py +407 -0
- fmu/core.py +105 -0
- fmu/search.py +174 -0
- fmu/update.py +349 -0
- fmu/validation.py +269 -0
- frontmatter_utils-0.4.0.dist-info/METADATA +852 -0
- frontmatter_utils-0.4.0.dist-info/RECORD +19 -0
- frontmatter_utils-0.4.0.dist-info/WHEEL +5 -0
- frontmatter_utils-0.4.0.dist-info/entry_points.txt +2 -0
- frontmatter_utils-0.4.0.dist-info/licenses/LICENSE +21 -0
- frontmatter_utils-0.4.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_cli.py +258 -0
- tests/test_core.py +120 -0
- tests/test_search.py +239 -0
- tests/test_update.py +303 -0
- tests/test_validation.py +357 -0
fmu/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fmu - Front Matter Utils
|
|
3
|
+
|
|
4
|
+
A Python library and CLI tool for parsing and searching front matter in files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.4.0"
|
|
8
|
+
__author__ = "Gerald Nguyen The Huy"
|
|
9
|
+
|
|
10
|
+
from .core import parse_frontmatter, extract_content, parse_file
|
|
11
|
+
from .search import search_frontmatter
|
|
12
|
+
from .validation import validate_frontmatter, validate_and_output
|
|
13
|
+
from .update import update_frontmatter, update_and_output
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"parse_frontmatter",
|
|
17
|
+
"extract_content",
|
|
18
|
+
"parse_file",
|
|
19
|
+
"search_frontmatter",
|
|
20
|
+
"validate_frontmatter",
|
|
21
|
+
"validate_and_output",
|
|
22
|
+
"update_frontmatter",
|
|
23
|
+
"update_and_output",
|
|
24
|
+
"__version__"
|
|
25
|
+
]
|
fmu/__main__.py
ADDED
fmu/cli.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command Line Interface for fmu.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from typing import List, Dict, Any
|
|
8
|
+
from . import __version__
|
|
9
|
+
from .core import parse_file, get_files_from_patterns
|
|
10
|
+
from .search import search_and_output
|
|
11
|
+
from .validation import validate_and_output
|
|
12
|
+
from .update import update_and_output
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def cmd_version():
|
|
16
|
+
"""Handle version command."""
|
|
17
|
+
print(__version__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def cmd_help():
|
|
21
|
+
"""Handle help command."""
|
|
22
|
+
print("fmu - Front Matter Utils")
|
|
23
|
+
print(f"Version: {__version__}")
|
|
24
|
+
print()
|
|
25
|
+
print("Usage: fmu [--format FORMAT] COMMAND [OPTIONS]")
|
|
26
|
+
print()
|
|
27
|
+
print("Global Options:")
|
|
28
|
+
print(" --format FORMAT Format of frontmatter (default: yaml)")
|
|
29
|
+
print(" May support TOML, JSON, INI in future versions")
|
|
30
|
+
print()
|
|
31
|
+
print("Commands:")
|
|
32
|
+
print(" version Show version number")
|
|
33
|
+
print(" help Show this help message")
|
|
34
|
+
print(" read PATTERNS Parse files and extract frontmatter/content")
|
|
35
|
+
print(" search PATTERNS Search for specific frontmatter fields")
|
|
36
|
+
print(" validate PATTERNS Validate frontmatter fields against rules")
|
|
37
|
+
print(" update PATTERNS Update frontmatter fields")
|
|
38
|
+
print()
|
|
39
|
+
print("For command-specific help, use: fmu COMMAND --help")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def cmd_read(patterns: List[str], output: str = "both", skip_heading: bool = False, format_type: str = "yaml"):
|
|
43
|
+
"""
|
|
44
|
+
Handle read command.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
patterns: List of glob patterns or file paths
|
|
48
|
+
output: What to output ('frontmatter', 'content', 'both')
|
|
49
|
+
skip_heading: Whether to skip section headings
|
|
50
|
+
format_type: Format of frontmatter
|
|
51
|
+
"""
|
|
52
|
+
files = get_files_from_patterns(patterns)
|
|
53
|
+
|
|
54
|
+
for file_path in files:
|
|
55
|
+
try:
|
|
56
|
+
frontmatter, content = parse_file(file_path, format_type)
|
|
57
|
+
|
|
58
|
+
if len(files) > 1:
|
|
59
|
+
print(f"\n=== {file_path} ===")
|
|
60
|
+
|
|
61
|
+
if output in ['frontmatter', 'both']:
|
|
62
|
+
if not skip_heading:
|
|
63
|
+
print("Front matter:")
|
|
64
|
+
if frontmatter:
|
|
65
|
+
import yaml
|
|
66
|
+
print(yaml.dump(frontmatter, default_flow_style=False).rstrip())
|
|
67
|
+
else:
|
|
68
|
+
print("None")
|
|
69
|
+
|
|
70
|
+
if output in ['content', 'both']:
|
|
71
|
+
if output == 'both' and not skip_heading:
|
|
72
|
+
print("\nContent:")
|
|
73
|
+
print(content.rstrip())
|
|
74
|
+
|
|
75
|
+
except (FileNotFoundError, ValueError, UnicodeDecodeError) as e:
|
|
76
|
+
print(f"Error processing {file_path}: {e}", file=sys.stderr)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def cmd_search(
|
|
80
|
+
patterns: List[str],
|
|
81
|
+
name: str,
|
|
82
|
+
value: str = None,
|
|
83
|
+
ignore_case: bool = False,
|
|
84
|
+
regex: bool = False,
|
|
85
|
+
csv_file: str = None,
|
|
86
|
+
format_type: str = "yaml"
|
|
87
|
+
):
|
|
88
|
+
"""
|
|
89
|
+
Handle search command.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
patterns: List of glob patterns or file paths
|
|
93
|
+
name: Name of frontmatter field to search for
|
|
94
|
+
value: Optional value to match
|
|
95
|
+
ignore_case: Whether to perform case-insensitive matching
|
|
96
|
+
regex: Whether to use regex pattern matching for values
|
|
97
|
+
csv_file: Optional CSV file for output
|
|
98
|
+
format_type: Format of frontmatter
|
|
99
|
+
"""
|
|
100
|
+
search_and_output(patterns, name, value, ignore_case, regex, csv_file, format_type)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def cmd_validate(
|
|
104
|
+
patterns: List[str],
|
|
105
|
+
validations: List[Dict[str, Any]],
|
|
106
|
+
ignore_case: bool = False,
|
|
107
|
+
csv_file: str = None,
|
|
108
|
+
format_type: str = "yaml"
|
|
109
|
+
):
|
|
110
|
+
"""
|
|
111
|
+
Handle validate command.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
patterns: List of glob patterns or file paths
|
|
115
|
+
validations: List of validation rules
|
|
116
|
+
ignore_case: Whether to perform case-insensitive matching
|
|
117
|
+
csv_file: Optional CSV file for output
|
|
118
|
+
format_type: Format of frontmatter
|
|
119
|
+
"""
|
|
120
|
+
validate_and_output(patterns, validations, ignore_case, csv_file, format_type)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def cmd_update(
|
|
124
|
+
patterns: List[str],
|
|
125
|
+
frontmatter_name: str,
|
|
126
|
+
operations: List[Dict[str, Any]],
|
|
127
|
+
deduplication: bool = True,
|
|
128
|
+
format_type: str = "yaml"
|
|
129
|
+
):
|
|
130
|
+
"""
|
|
131
|
+
Handle update command.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
patterns: List of glob patterns or file paths
|
|
135
|
+
frontmatter_name: Name of frontmatter field to update
|
|
136
|
+
operations: List of update operations to apply
|
|
137
|
+
deduplication: Whether to deduplicate array values
|
|
138
|
+
format_type: Format of frontmatter
|
|
139
|
+
"""
|
|
140
|
+
update_and_output(patterns, frontmatter_name, operations, deduplication, format_type)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def create_parser():
|
|
144
|
+
"""Create argument parser."""
|
|
145
|
+
parser = argparse.ArgumentParser(
|
|
146
|
+
prog='fmu',
|
|
147
|
+
description='Front Matter Utils - Parse and search frontmatter in files'
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
'--format',
|
|
152
|
+
default='yaml',
|
|
153
|
+
help='Format of frontmatter (default: yaml). May support TOML, JSON, INI in future versions'
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
157
|
+
|
|
158
|
+
# Version command
|
|
159
|
+
subparsers.add_parser('version', help='Show version number')
|
|
160
|
+
|
|
161
|
+
# Help command
|
|
162
|
+
subparsers.add_parser('help', help='Show help information')
|
|
163
|
+
|
|
164
|
+
# Read command
|
|
165
|
+
read_parser = subparsers.add_parser('read', help='Parse files and extract frontmatter/content')
|
|
166
|
+
read_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
|
|
167
|
+
read_parser.add_argument(
|
|
168
|
+
'--output',
|
|
169
|
+
choices=['frontmatter', 'content', 'both'],
|
|
170
|
+
default='both',
|
|
171
|
+
help='What to output (default: both)'
|
|
172
|
+
)
|
|
173
|
+
read_parser.add_argument(
|
|
174
|
+
'--skip-heading',
|
|
175
|
+
action='store_true',
|
|
176
|
+
help='Skip section headings (default: false)'
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Search command
|
|
180
|
+
search_parser = subparsers.add_parser('search', help='Search for specific frontmatter fields')
|
|
181
|
+
search_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
|
|
182
|
+
search_parser.add_argument('--name', required=True, help='Name of frontmatter field to search for')
|
|
183
|
+
search_parser.add_argument('--value', help='Value to match (optional)')
|
|
184
|
+
search_parser.add_argument(
|
|
185
|
+
'--ignore-case',
|
|
186
|
+
action='store_true',
|
|
187
|
+
help='Case-insensitive matching (default: false)'
|
|
188
|
+
)
|
|
189
|
+
search_parser.add_argument(
|
|
190
|
+
'--regex',
|
|
191
|
+
action='store_true',
|
|
192
|
+
help='Use regex pattern matching for values (default: false)'
|
|
193
|
+
)
|
|
194
|
+
search_parser.add_argument('--csv', dest='csv_file', help='Output to CSV file')
|
|
195
|
+
|
|
196
|
+
# Validate command
|
|
197
|
+
validate_parser = subparsers.add_parser('validate', help='Validate frontmatter fields against rules')
|
|
198
|
+
validate_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
|
|
199
|
+
|
|
200
|
+
# Validation rule options (can appear multiple times)
|
|
201
|
+
validate_parser.add_argument('--exist', action='append', help='Require field to exist')
|
|
202
|
+
validate_parser.add_argument('--not', action='append', dest='not_exist', help='Require field to not exist')
|
|
203
|
+
validate_parser.add_argument('--eq', action='append', nargs=2, metavar=('FIELD', 'VALUE'), help='Require field equals value')
|
|
204
|
+
validate_parser.add_argument('--ne', action='append', nargs=2, metavar=('FIELD', 'VALUE'), help='Require field not equals value')
|
|
205
|
+
validate_parser.add_argument('--contain', action='append', nargs=2, metavar=('FIELD', 'VALUE'), help='Require array field contains value')
|
|
206
|
+
validate_parser.add_argument('--not-contain', action='append', nargs=2, metavar=('FIELD', 'VALUE'), dest='not_contain', help='Require array field does not contain value')
|
|
207
|
+
validate_parser.add_argument('--match', action='append', nargs=2, metavar=('FIELD', 'REGEX'), help='Require field matches regex')
|
|
208
|
+
validate_parser.add_argument('--not-match', action='append', nargs=2, metavar=('FIELD', 'REGEX'), dest='not_match', help='Require field does not match regex')
|
|
209
|
+
|
|
210
|
+
validate_parser.add_argument(
|
|
211
|
+
'--ignore-case',
|
|
212
|
+
action='store_true',
|
|
213
|
+
help='Case-insensitive matching (default: false)'
|
|
214
|
+
)
|
|
215
|
+
validate_parser.add_argument('--csv', dest='csv_file', help='Output to CSV file')
|
|
216
|
+
|
|
217
|
+
# Update command
|
|
218
|
+
update_parser = subparsers.add_parser('update', help='Update frontmatter fields')
|
|
219
|
+
update_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
|
|
220
|
+
update_parser.add_argument('--name', required=True, help='Name of frontmatter field to update')
|
|
221
|
+
|
|
222
|
+
# Update operation options
|
|
223
|
+
update_parser.add_argument(
|
|
224
|
+
'--deduplication',
|
|
225
|
+
choices=['true', 'false'],
|
|
226
|
+
default='true',
|
|
227
|
+
help='Eliminate exact duplicates in array values (default: true)'
|
|
228
|
+
)
|
|
229
|
+
update_parser.add_argument(
|
|
230
|
+
'--case',
|
|
231
|
+
choices=['upper', 'lower', 'Sentence case', 'Title Case', 'snake_case', 'kebab-case'],
|
|
232
|
+
help='Transform the case of the frontmatter value(s)'
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Replace operations (can appear multiple times)
|
|
236
|
+
update_parser.add_argument(
|
|
237
|
+
'--replace',
|
|
238
|
+
action='append',
|
|
239
|
+
nargs=2,
|
|
240
|
+
metavar=('FROM', 'TO'),
|
|
241
|
+
help='Replace values matching FROM with TO (can be used multiple times)'
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Remove operations (can appear multiple times)
|
|
245
|
+
update_parser.add_argument(
|
|
246
|
+
'--remove',
|
|
247
|
+
action='append',
|
|
248
|
+
help='Remove values matching the specified pattern (can be used multiple times)'
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Shared options for replace and remove operations
|
|
252
|
+
update_parser.add_argument(
|
|
253
|
+
'--ignore-case',
|
|
254
|
+
action='store_true',
|
|
255
|
+
help='Ignore case when performing replacements and removals (default: false)'
|
|
256
|
+
)
|
|
257
|
+
update_parser.add_argument(
|
|
258
|
+
'--regex',
|
|
259
|
+
action='store_true',
|
|
260
|
+
help='Treat patterns as regex for replacements and removals (default: false)'
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
return parser
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _parse_update_args(args) -> List[Dict[str, Any]]:
|
|
267
|
+
"""Parse update arguments into update operations."""
|
|
268
|
+
operations = []
|
|
269
|
+
|
|
270
|
+
# Handle --case
|
|
271
|
+
if args.case:
|
|
272
|
+
operations.append({
|
|
273
|
+
'type': 'case',
|
|
274
|
+
'case_type': args.case
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
# Handle --replace operations
|
|
278
|
+
if args.replace:
|
|
279
|
+
for from_val, to_val in args.replace:
|
|
280
|
+
operations.append({
|
|
281
|
+
'type': 'replace',
|
|
282
|
+
'from': from_val,
|
|
283
|
+
'to': to_val,
|
|
284
|
+
'ignore_case': args.ignore_case,
|
|
285
|
+
'regex': args.regex
|
|
286
|
+
})
|
|
287
|
+
|
|
288
|
+
# Handle --remove operations
|
|
289
|
+
if args.remove:
|
|
290
|
+
for remove_val in args.remove:
|
|
291
|
+
operations.append({
|
|
292
|
+
'type': 'remove',
|
|
293
|
+
'value': remove_val,
|
|
294
|
+
'ignore_case': args.ignore_case,
|
|
295
|
+
'regex': args.regex
|
|
296
|
+
})
|
|
297
|
+
|
|
298
|
+
return operations
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _parse_validation_args(args) -> List[Dict[str, Any]]:
|
|
302
|
+
"""Parse validation arguments into validation rules."""
|
|
303
|
+
validations = []
|
|
304
|
+
|
|
305
|
+
# Handle --exist
|
|
306
|
+
if args.exist:
|
|
307
|
+
for field in args.exist:
|
|
308
|
+
validations.append({'type': 'exist', 'field': field})
|
|
309
|
+
|
|
310
|
+
# Handle --not
|
|
311
|
+
if args.not_exist:
|
|
312
|
+
for field in args.not_exist:
|
|
313
|
+
validations.append({'type': 'not', 'field': field})
|
|
314
|
+
|
|
315
|
+
# Handle --eq
|
|
316
|
+
if args.eq:
|
|
317
|
+
for field, value in args.eq:
|
|
318
|
+
validations.append({'type': 'eq', 'field': field, 'value': value})
|
|
319
|
+
|
|
320
|
+
# Handle --ne
|
|
321
|
+
if args.ne:
|
|
322
|
+
for field, value in args.ne:
|
|
323
|
+
validations.append({'type': 'ne', 'field': field, 'value': value})
|
|
324
|
+
|
|
325
|
+
# Handle --contain
|
|
326
|
+
if args.contain:
|
|
327
|
+
for field, value in args.contain:
|
|
328
|
+
validations.append({'type': 'contain', 'field': field, 'value': value})
|
|
329
|
+
|
|
330
|
+
# Handle --not-contain
|
|
331
|
+
if args.not_contain:
|
|
332
|
+
for field, value in args.not_contain:
|
|
333
|
+
validations.append({'type': 'not-contain', 'field': field, 'value': value})
|
|
334
|
+
|
|
335
|
+
# Handle --match
|
|
336
|
+
if args.match:
|
|
337
|
+
for field, regex in args.match:
|
|
338
|
+
validations.append({'type': 'match', 'field': field, 'regex': regex})
|
|
339
|
+
|
|
340
|
+
# Handle --not-match
|
|
341
|
+
if args.not_match:
|
|
342
|
+
for field, regex in args.not_match:
|
|
343
|
+
validations.append({'type': 'not-match', 'field': field, 'regex': regex})
|
|
344
|
+
|
|
345
|
+
return validations
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def main():
|
|
349
|
+
"""Main CLI entry point."""
|
|
350
|
+
parser = create_parser()
|
|
351
|
+
args = parser.parse_args()
|
|
352
|
+
|
|
353
|
+
if args.command == 'version':
|
|
354
|
+
cmd_version()
|
|
355
|
+
elif args.command == 'help':
|
|
356
|
+
cmd_help()
|
|
357
|
+
elif args.command == 'read':
|
|
358
|
+
cmd_read(
|
|
359
|
+
patterns=args.patterns,
|
|
360
|
+
output=args.output,
|
|
361
|
+
skip_heading=args.skip_heading,
|
|
362
|
+
format_type=args.format
|
|
363
|
+
)
|
|
364
|
+
elif args.command == 'search':
|
|
365
|
+
cmd_search(
|
|
366
|
+
patterns=args.patterns,
|
|
367
|
+
name=args.name,
|
|
368
|
+
value=args.value,
|
|
369
|
+
ignore_case=args.ignore_case,
|
|
370
|
+
regex=args.regex,
|
|
371
|
+
csv_file=args.csv_file,
|
|
372
|
+
format_type=args.format
|
|
373
|
+
)
|
|
374
|
+
elif args.command == 'validate':
|
|
375
|
+
validations = _parse_validation_args(args)
|
|
376
|
+
if not validations:
|
|
377
|
+
print("Error: No validation rules specified", file=sys.stderr)
|
|
378
|
+
sys.exit(1)
|
|
379
|
+
cmd_validate(
|
|
380
|
+
patterns=args.patterns,
|
|
381
|
+
validations=validations,
|
|
382
|
+
ignore_case=args.ignore_case,
|
|
383
|
+
csv_file=args.csv_file,
|
|
384
|
+
format_type=args.format
|
|
385
|
+
)
|
|
386
|
+
elif args.command == 'update':
|
|
387
|
+
operations = _parse_update_args(args)
|
|
388
|
+
if not operations:
|
|
389
|
+
print("Error: No update operations specified", file=sys.stderr)
|
|
390
|
+
sys.exit(1)
|
|
391
|
+
cmd_update(
|
|
392
|
+
patterns=args.patterns,
|
|
393
|
+
frontmatter_name=args.name,
|
|
394
|
+
operations=operations,
|
|
395
|
+
deduplication=(args.deduplication == 'true'),
|
|
396
|
+
format_type=args.format
|
|
397
|
+
)
|
|
398
|
+
elif args.command is None:
|
|
399
|
+
# No command provided, show help
|
|
400
|
+
cmd_help()
|
|
401
|
+
else:
|
|
402
|
+
print(f"Unknown command: {args.command}", file=sys.stderr)
|
|
403
|
+
sys.exit(1)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
if __name__ == '__main__':
|
|
407
|
+
main()
|
fmu/core.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core frontmatter parsing functionality for fmu.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import yaml
|
|
7
|
+
from typing import Dict, Any, Tuple, Optional
|
|
8
|
+
import glob
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_frontmatter(content: str, format_type: str = "yaml") -> Tuple[Optional[Dict[str, Any]], str]:
|
|
13
|
+
"""
|
|
14
|
+
Parse frontmatter from content string.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
content: The file content as a string
|
|
18
|
+
format_type: The format of the frontmatter (currently only 'yaml' supported)
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Tuple of (frontmatter_dict, remaining_content)
|
|
22
|
+
"""
|
|
23
|
+
if format_type.lower() != "yaml":
|
|
24
|
+
raise ValueError(f"Format '{format_type}' not supported. Currently only 'yaml' is supported.")
|
|
25
|
+
|
|
26
|
+
# Look for YAML frontmatter delimited by ---
|
|
27
|
+
pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
|
|
28
|
+
match = re.match(pattern, content, re.DOTALL)
|
|
29
|
+
|
|
30
|
+
if not match:
|
|
31
|
+
# No frontmatter found
|
|
32
|
+
return None, content
|
|
33
|
+
|
|
34
|
+
frontmatter_content = match.group(1)
|
|
35
|
+
remaining_content = match.group(2)
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
frontmatter = yaml.safe_load(frontmatter_content)
|
|
39
|
+
return frontmatter, remaining_content
|
|
40
|
+
except yaml.YAMLError as e:
|
|
41
|
+
raise ValueError(f"Invalid YAML frontmatter: {e}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def extract_content(content: str, format_type: str = "yaml") -> str:
|
|
45
|
+
"""
|
|
46
|
+
Extract only the content (without frontmatter) from a string.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
content: The file content as a string
|
|
50
|
+
format_type: The format of the frontmatter
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
The content without frontmatter
|
|
54
|
+
"""
|
|
55
|
+
_, content_only = parse_frontmatter(content, format_type)
|
|
56
|
+
return content_only
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_file(file_path: str, format_type: str = "yaml") -> Tuple[Optional[Dict[str, Any]], str]:
|
|
60
|
+
"""
|
|
61
|
+
Parse frontmatter from a file.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
file_path: Path to the file to parse
|
|
65
|
+
format_type: The format of the frontmatter
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Tuple of (frontmatter_dict, content)
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
72
|
+
content = f.read()
|
|
73
|
+
return parse_frontmatter(content, format_type)
|
|
74
|
+
except FileNotFoundError:
|
|
75
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
76
|
+
except UnicodeDecodeError:
|
|
77
|
+
raise ValueError(f"Unable to decode file as UTF-8: {file_path}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_files_from_patterns(patterns: list) -> list:
|
|
81
|
+
"""
|
|
82
|
+
Get list of files from glob patterns.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
patterns: List of glob patterns or file paths
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of file paths
|
|
89
|
+
"""
|
|
90
|
+
files = []
|
|
91
|
+
for pattern in patterns:
|
|
92
|
+
if os.path.isfile(pattern):
|
|
93
|
+
files.append(pattern)
|
|
94
|
+
elif os.path.isdir(pattern):
|
|
95
|
+
# If it's a directory, add all files in it
|
|
96
|
+
for root, _, filenames in os.walk(pattern):
|
|
97
|
+
for filename in filenames:
|
|
98
|
+
files.append(os.path.join(root, filename))
|
|
99
|
+
else:
|
|
100
|
+
# Treat as glob pattern
|
|
101
|
+
matched_files = glob.glob(pattern, recursive=True)
|
|
102
|
+
files.extend(matched_files)
|
|
103
|
+
|
|
104
|
+
# Remove duplicates and sort
|
|
105
|
+
return sorted(list(set(files)))
|