frontmatter-utils 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fmu/__init__.py ADDED
@@ -0,0 +1,25 @@
1
+ """
2
+ fmu - Front Matter Utils
3
+
4
+ A Python library and CLI tool for parsing and searching front matter in files.
5
+ """
6
+
7
+ __version__ = "0.4.0"
8
+ __author__ = "Gerald Nguyen The Huy"
9
+
10
+ from .core import parse_frontmatter, extract_content, parse_file
11
+ from .search import search_frontmatter
12
+ from .validation import validate_frontmatter, validate_and_output
13
+ from .update import update_frontmatter, update_and_output
14
+
15
+ __all__ = [
16
+ "parse_frontmatter",
17
+ "extract_content",
18
+ "parse_file",
19
+ "search_frontmatter",
20
+ "validate_frontmatter",
21
+ "validate_and_output",
22
+ "update_frontmatter",
23
+ "update_and_output",
24
+ "__version__"
25
+ ]
fmu/__main__.py ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Main entry point for fmu when run as a module.
4
+ """
5
+
6
+ from .cli import main
7
+
8
+ if __name__ == '__main__':
9
+ main()
fmu/cli.py ADDED
@@ -0,0 +1,407 @@
1
+ """
2
+ Command Line Interface for fmu.
3
+ """
4
+
5
+ import argparse
6
+ import sys
7
+ from typing import List, Dict, Any
8
+ from . import __version__
9
+ from .core import parse_file, get_files_from_patterns
10
+ from .search import search_and_output
11
+ from .validation import validate_and_output
12
+ from .update import update_and_output
13
+
14
+
15
+ def cmd_version():
16
+ """Handle version command."""
17
+ print(__version__)
18
+
19
+
20
+ def cmd_help():
21
+ """Handle help command."""
22
+ print("fmu - Front Matter Utils")
23
+ print(f"Version: {__version__}")
24
+ print()
25
+ print("Usage: fmu [--format FORMAT] COMMAND [OPTIONS]")
26
+ print()
27
+ print("Global Options:")
28
+ print(" --format FORMAT Format of frontmatter (default: yaml)")
29
+ print(" May support TOML, JSON, INI in future versions")
30
+ print()
31
+ print("Commands:")
32
+ print(" version Show version number")
33
+ print(" help Show this help message")
34
+ print(" read PATTERNS Parse files and extract frontmatter/content")
35
+ print(" search PATTERNS Search for specific frontmatter fields")
36
+ print(" validate PATTERNS Validate frontmatter fields against rules")
37
+ print(" update PATTERNS Update frontmatter fields")
38
+ print()
39
+ print("For command-specific help, use: fmu COMMAND --help")
40
+
41
+
42
+ def cmd_read(patterns: List[str], output: str = "both", skip_heading: bool = False, format_type: str = "yaml"):
43
+ """
44
+ Handle read command.
45
+
46
+ Args:
47
+ patterns: List of glob patterns or file paths
48
+ output: What to output ('frontmatter', 'content', 'both')
49
+ skip_heading: Whether to skip section headings
50
+ format_type: Format of frontmatter
51
+ """
52
+ files = get_files_from_patterns(patterns)
53
+
54
+ for file_path in files:
55
+ try:
56
+ frontmatter, content = parse_file(file_path, format_type)
57
+
58
+ if len(files) > 1:
59
+ print(f"\n=== {file_path} ===")
60
+
61
+ if output in ['frontmatter', 'both']:
62
+ if not skip_heading:
63
+ print("Front matter:")
64
+ if frontmatter:
65
+ import yaml
66
+ print(yaml.dump(frontmatter, default_flow_style=False).rstrip())
67
+ else:
68
+ print("None")
69
+
70
+ if output in ['content', 'both']:
71
+ if output == 'both' and not skip_heading:
72
+ print("\nContent:")
73
+ print(content.rstrip())
74
+
75
+ except (FileNotFoundError, ValueError, UnicodeDecodeError) as e:
76
+ print(f"Error processing {file_path}: {e}", file=sys.stderr)
77
+
78
+
79
+ def cmd_search(
80
+ patterns: List[str],
81
+ name: str,
82
+ value: str = None,
83
+ ignore_case: bool = False,
84
+ regex: bool = False,
85
+ csv_file: str = None,
86
+ format_type: str = "yaml"
87
+ ):
88
+ """
89
+ Handle search command.
90
+
91
+ Args:
92
+ patterns: List of glob patterns or file paths
93
+ name: Name of frontmatter field to search for
94
+ value: Optional value to match
95
+ ignore_case: Whether to perform case-insensitive matching
96
+ regex: Whether to use regex pattern matching for values
97
+ csv_file: Optional CSV file for output
98
+ format_type: Format of frontmatter
99
+ """
100
+ search_and_output(patterns, name, value, ignore_case, regex, csv_file, format_type)
101
+
102
+
103
+ def cmd_validate(
104
+ patterns: List[str],
105
+ validations: List[Dict[str, Any]],
106
+ ignore_case: bool = False,
107
+ csv_file: str = None,
108
+ format_type: str = "yaml"
109
+ ):
110
+ """
111
+ Handle validate command.
112
+
113
+ Args:
114
+ patterns: List of glob patterns or file paths
115
+ validations: List of validation rules
116
+ ignore_case: Whether to perform case-insensitive matching
117
+ csv_file: Optional CSV file for output
118
+ format_type: Format of frontmatter
119
+ """
120
+ validate_and_output(patterns, validations, ignore_case, csv_file, format_type)
121
+
122
+
123
+ def cmd_update(
124
+ patterns: List[str],
125
+ frontmatter_name: str,
126
+ operations: List[Dict[str, Any]],
127
+ deduplication: bool = True,
128
+ format_type: str = "yaml"
129
+ ):
130
+ """
131
+ Handle update command.
132
+
133
+ Args:
134
+ patterns: List of glob patterns or file paths
135
+ frontmatter_name: Name of frontmatter field to update
136
+ operations: List of update operations to apply
137
+ deduplication: Whether to deduplicate array values
138
+ format_type: Format of frontmatter
139
+ """
140
+ update_and_output(patterns, frontmatter_name, operations, deduplication, format_type)
141
+
142
+
143
+ def create_parser():
144
+ """Create argument parser."""
145
+ parser = argparse.ArgumentParser(
146
+ prog='fmu',
147
+ description='Front Matter Utils - Parse and search frontmatter in files'
148
+ )
149
+
150
+ parser.add_argument(
151
+ '--format',
152
+ default='yaml',
153
+ help='Format of frontmatter (default: yaml). May support TOML, JSON, INI in future versions'
154
+ )
155
+
156
+ subparsers = parser.add_subparsers(dest='command', help='Commands')
157
+
158
+ # Version command
159
+ subparsers.add_parser('version', help='Show version number')
160
+
161
+ # Help command
162
+ subparsers.add_parser('help', help='Show help information')
163
+
164
+ # Read command
165
+ read_parser = subparsers.add_parser('read', help='Parse files and extract frontmatter/content')
166
+ read_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
167
+ read_parser.add_argument(
168
+ '--output',
169
+ choices=['frontmatter', 'content', 'both'],
170
+ default='both',
171
+ help='What to output (default: both)'
172
+ )
173
+ read_parser.add_argument(
174
+ '--skip-heading',
175
+ action='store_true',
176
+ help='Skip section headings (default: false)'
177
+ )
178
+
179
+ # Search command
180
+ search_parser = subparsers.add_parser('search', help='Search for specific frontmatter fields')
181
+ search_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
182
+ search_parser.add_argument('--name', required=True, help='Name of frontmatter field to search for')
183
+ search_parser.add_argument('--value', help='Value to match (optional)')
184
+ search_parser.add_argument(
185
+ '--ignore-case',
186
+ action='store_true',
187
+ help='Case-insensitive matching (default: false)'
188
+ )
189
+ search_parser.add_argument(
190
+ '--regex',
191
+ action='store_true',
192
+ help='Use regex pattern matching for values (default: false)'
193
+ )
194
+ search_parser.add_argument('--csv', dest='csv_file', help='Output to CSV file')
195
+
196
+ # Validate command
197
+ validate_parser = subparsers.add_parser('validate', help='Validate frontmatter fields against rules')
198
+ validate_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
199
+
200
+ # Validation rule options (can appear multiple times)
201
+ validate_parser.add_argument('--exist', action='append', help='Require field to exist')
202
+ validate_parser.add_argument('--not', action='append', dest='not_exist', help='Require field to not exist')
203
+ validate_parser.add_argument('--eq', action='append', nargs=2, metavar=('FIELD', 'VALUE'), help='Require field equals value')
204
+ validate_parser.add_argument('--ne', action='append', nargs=2, metavar=('FIELD', 'VALUE'), help='Require field not equals value')
205
+ validate_parser.add_argument('--contain', action='append', nargs=2, metavar=('FIELD', 'VALUE'), help='Require array field contains value')
206
+ validate_parser.add_argument('--not-contain', action='append', nargs=2, metavar=('FIELD', 'VALUE'), dest='not_contain', help='Require array field does not contain value')
207
+ validate_parser.add_argument('--match', action='append', nargs=2, metavar=('FIELD', 'REGEX'), help='Require field matches regex')
208
+ validate_parser.add_argument('--not-match', action='append', nargs=2, metavar=('FIELD', 'REGEX'), dest='not_match', help='Require field does not match regex')
209
+
210
+ validate_parser.add_argument(
211
+ '--ignore-case',
212
+ action='store_true',
213
+ help='Case-insensitive matching (default: false)'
214
+ )
215
+ validate_parser.add_argument('--csv', dest='csv_file', help='Output to CSV file')
216
+
217
+ # Update command
218
+ update_parser = subparsers.add_parser('update', help='Update frontmatter fields')
219
+ update_parser.add_argument('patterns', nargs='+', help='Glob patterns or file paths')
220
+ update_parser.add_argument('--name', required=True, help='Name of frontmatter field to update')
221
+
222
+ # Update operation options
223
+ update_parser.add_argument(
224
+ '--deduplication',
225
+ choices=['true', 'false'],
226
+ default='true',
227
+ help='Eliminate exact duplicates in array values (default: true)'
228
+ )
229
+ update_parser.add_argument(
230
+ '--case',
231
+ choices=['upper', 'lower', 'Sentence case', 'Title Case', 'snake_case', 'kebab-case'],
232
+ help='Transform the case of the frontmatter value(s)'
233
+ )
234
+
235
+ # Replace operations (can appear multiple times)
236
+ update_parser.add_argument(
237
+ '--replace',
238
+ action='append',
239
+ nargs=2,
240
+ metavar=('FROM', 'TO'),
241
+ help='Replace values matching FROM with TO (can be used multiple times)'
242
+ )
243
+
244
+ # Remove operations (can appear multiple times)
245
+ update_parser.add_argument(
246
+ '--remove',
247
+ action='append',
248
+ help='Remove values matching the specified pattern (can be used multiple times)'
249
+ )
250
+
251
+ # Shared options for replace and remove operations
252
+ update_parser.add_argument(
253
+ '--ignore-case',
254
+ action='store_true',
255
+ help='Ignore case when performing replacements and removals (default: false)'
256
+ )
257
+ update_parser.add_argument(
258
+ '--regex',
259
+ action='store_true',
260
+ help='Treat patterns as regex for replacements and removals (default: false)'
261
+ )
262
+
263
+ return parser
264
+
265
+
266
+ def _parse_update_args(args) -> List[Dict[str, Any]]:
267
+ """Parse update arguments into update operations."""
268
+ operations = []
269
+
270
+ # Handle --case
271
+ if args.case:
272
+ operations.append({
273
+ 'type': 'case',
274
+ 'case_type': args.case
275
+ })
276
+
277
+ # Handle --replace operations
278
+ if args.replace:
279
+ for from_val, to_val in args.replace:
280
+ operations.append({
281
+ 'type': 'replace',
282
+ 'from': from_val,
283
+ 'to': to_val,
284
+ 'ignore_case': args.ignore_case,
285
+ 'regex': args.regex
286
+ })
287
+
288
+ # Handle --remove operations
289
+ if args.remove:
290
+ for remove_val in args.remove:
291
+ operations.append({
292
+ 'type': 'remove',
293
+ 'value': remove_val,
294
+ 'ignore_case': args.ignore_case,
295
+ 'regex': args.regex
296
+ })
297
+
298
+ return operations
299
+
300
+
301
+ def _parse_validation_args(args) -> List[Dict[str, Any]]:
302
+ """Parse validation arguments into validation rules."""
303
+ validations = []
304
+
305
+ # Handle --exist
306
+ if args.exist:
307
+ for field in args.exist:
308
+ validations.append({'type': 'exist', 'field': field})
309
+
310
+ # Handle --not
311
+ if args.not_exist:
312
+ for field in args.not_exist:
313
+ validations.append({'type': 'not', 'field': field})
314
+
315
+ # Handle --eq
316
+ if args.eq:
317
+ for field, value in args.eq:
318
+ validations.append({'type': 'eq', 'field': field, 'value': value})
319
+
320
+ # Handle --ne
321
+ if args.ne:
322
+ for field, value in args.ne:
323
+ validations.append({'type': 'ne', 'field': field, 'value': value})
324
+
325
+ # Handle --contain
326
+ if args.contain:
327
+ for field, value in args.contain:
328
+ validations.append({'type': 'contain', 'field': field, 'value': value})
329
+
330
+ # Handle --not-contain
331
+ if args.not_contain:
332
+ for field, value in args.not_contain:
333
+ validations.append({'type': 'not-contain', 'field': field, 'value': value})
334
+
335
+ # Handle --match
336
+ if args.match:
337
+ for field, regex in args.match:
338
+ validations.append({'type': 'match', 'field': field, 'regex': regex})
339
+
340
+ # Handle --not-match
341
+ if args.not_match:
342
+ for field, regex in args.not_match:
343
+ validations.append({'type': 'not-match', 'field': field, 'regex': regex})
344
+
345
+ return validations
346
+
347
+
348
+ def main():
349
+ """Main CLI entry point."""
350
+ parser = create_parser()
351
+ args = parser.parse_args()
352
+
353
+ if args.command == 'version':
354
+ cmd_version()
355
+ elif args.command == 'help':
356
+ cmd_help()
357
+ elif args.command == 'read':
358
+ cmd_read(
359
+ patterns=args.patterns,
360
+ output=args.output,
361
+ skip_heading=args.skip_heading,
362
+ format_type=args.format
363
+ )
364
+ elif args.command == 'search':
365
+ cmd_search(
366
+ patterns=args.patterns,
367
+ name=args.name,
368
+ value=args.value,
369
+ ignore_case=args.ignore_case,
370
+ regex=args.regex,
371
+ csv_file=args.csv_file,
372
+ format_type=args.format
373
+ )
374
+ elif args.command == 'validate':
375
+ validations = _parse_validation_args(args)
376
+ if not validations:
377
+ print("Error: No validation rules specified", file=sys.stderr)
378
+ sys.exit(1)
379
+ cmd_validate(
380
+ patterns=args.patterns,
381
+ validations=validations,
382
+ ignore_case=args.ignore_case,
383
+ csv_file=args.csv_file,
384
+ format_type=args.format
385
+ )
386
+ elif args.command == 'update':
387
+ operations = _parse_update_args(args)
388
+ if not operations:
389
+ print("Error: No update operations specified", file=sys.stderr)
390
+ sys.exit(1)
391
+ cmd_update(
392
+ patterns=args.patterns,
393
+ frontmatter_name=args.name,
394
+ operations=operations,
395
+ deduplication=(args.deduplication == 'true'),
396
+ format_type=args.format
397
+ )
398
+ elif args.command is None:
399
+ # No command provided, show help
400
+ cmd_help()
401
+ else:
402
+ print(f"Unknown command: {args.command}", file=sys.stderr)
403
+ sys.exit(1)
404
+
405
+
406
+ if __name__ == '__main__':
407
+ main()
fmu/core.py ADDED
@@ -0,0 +1,105 @@
1
+ """
2
+ Core frontmatter parsing functionality for fmu.
3
+ """
4
+
5
+ import re
6
+ import yaml
7
+ from typing import Dict, Any, Tuple, Optional
8
+ import glob
9
+ import os
10
+
11
+
12
+ def parse_frontmatter(content: str, format_type: str = "yaml") -> Tuple[Optional[Dict[str, Any]], str]:
13
+ """
14
+ Parse frontmatter from content string.
15
+
16
+ Args:
17
+ content: The file content as a string
18
+ format_type: The format of the frontmatter (currently only 'yaml' supported)
19
+
20
+ Returns:
21
+ Tuple of (frontmatter_dict, remaining_content)
22
+ """
23
+ if format_type.lower() != "yaml":
24
+ raise ValueError(f"Format '{format_type}' not supported. Currently only 'yaml' is supported.")
25
+
26
+ # Look for YAML frontmatter delimited by ---
27
+ pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
28
+ match = re.match(pattern, content, re.DOTALL)
29
+
30
+ if not match:
31
+ # No frontmatter found
32
+ return None, content
33
+
34
+ frontmatter_content = match.group(1)
35
+ remaining_content = match.group(2)
36
+
37
+ try:
38
+ frontmatter = yaml.safe_load(frontmatter_content)
39
+ return frontmatter, remaining_content
40
+ except yaml.YAMLError as e:
41
+ raise ValueError(f"Invalid YAML frontmatter: {e}")
42
+
43
+
44
+ def extract_content(content: str, format_type: str = "yaml") -> str:
45
+ """
46
+ Extract only the content (without frontmatter) from a string.
47
+
48
+ Args:
49
+ content: The file content as a string
50
+ format_type: The format of the frontmatter
51
+
52
+ Returns:
53
+ The content without frontmatter
54
+ """
55
+ _, content_only = parse_frontmatter(content, format_type)
56
+ return content_only
57
+
58
+
59
+ def parse_file(file_path: str, format_type: str = "yaml") -> Tuple[Optional[Dict[str, Any]], str]:
60
+ """
61
+ Parse frontmatter from a file.
62
+
63
+ Args:
64
+ file_path: Path to the file to parse
65
+ format_type: The format of the frontmatter
66
+
67
+ Returns:
68
+ Tuple of (frontmatter_dict, content)
69
+ """
70
+ try:
71
+ with open(file_path, 'r', encoding='utf-8') as f:
72
+ content = f.read()
73
+ return parse_frontmatter(content, format_type)
74
+ except FileNotFoundError:
75
+ raise FileNotFoundError(f"File not found: {file_path}")
76
+ except UnicodeDecodeError:
77
+ raise ValueError(f"Unable to decode file as UTF-8: {file_path}")
78
+
79
+
80
+ def get_files_from_patterns(patterns: list) -> list:
81
+ """
82
+ Get list of files from glob patterns.
83
+
84
+ Args:
85
+ patterns: List of glob patterns or file paths
86
+
87
+ Returns:
88
+ List of file paths
89
+ """
90
+ files = []
91
+ for pattern in patterns:
92
+ if os.path.isfile(pattern):
93
+ files.append(pattern)
94
+ elif os.path.isdir(pattern):
95
+ # If it's a directory, add all files in it
96
+ for root, _, filenames in os.walk(pattern):
97
+ for filename in filenames:
98
+ files.append(os.path.join(root, filename))
99
+ else:
100
+ # Treat as glob pattern
101
+ matched_files = glob.glob(pattern, recursive=True)
102
+ files.extend(matched_files)
103
+
104
+ # Remove duplicates and sort
105
+ return sorted(list(set(files)))