obsidianmd-parser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ """Obsidian Parser - A Python library for parsing Obsidian Markdown files."""
2
+
3
+ __version__ = "0.1.0"
4
+ __author__ = "paddyd"
5
+ __email__ = "patduf1@gmail.com"
6
+
7
+ from obsidian_parser.note import Embed, Note, Section, Tag, WikiLink
8
+ from obsidian_parser.vault import Vault
9
+
10
+ __all__ = [
11
+ "Vault",
12
+ "Note",
13
+ "WikiLink",
14
+ "Tag",
15
+ "Embed",
16
+ "Section",
17
+ ]
@@ -0,0 +1,636 @@
1
+ # src/obsidian_parser/dataview/evaluator.py
2
+ """Dataview query evaluation engine."""
3
+
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ import pandas as pd
9
+
10
+ from obsidian_parser.models.dataview import DataviewQuery, DataviewQueryType
11
+ from obsidian_parser.note import Note
12
+ from obsidian_parser.vault import Vault
13
+
14
+
15
+ class DataviewEvaluator:
16
+ """Evaluates Dataview queries against a vault."""
17
+
18
+ def __init__(self, vault: Vault):
19
+ """Initialize evaluator with a vault.
20
+
21
+ Args:
22
+ vault: The vault to query against
23
+ """
24
+ self.vault = vault
25
+ self._metadata_cache: dict[str, dict[str, Any]] = {}
26
+
27
+ def evaluate(self, query: DataviewQuery, context_note: Optional[Note] = None) -> pd.DataFrame:
28
+ """Evaluate a Dataview query.
29
+
30
+ Args:
31
+ query: The query to evaluate
32
+ context_note: The note containing the query (for 'this' references)
33
+
34
+ Returns:
35
+ DataFrame with query results
36
+ """
37
+ # Get candidate notes based on FROM clause
38
+ candidates = self._get_candidates(query.from_clause, context_note)
39
+
40
+ # Filter based on WHERE clause
41
+ if query.where_clause:
42
+ candidates = self._apply_where(candidates, query.where_clause, context_note)
43
+
44
+ # Build result data
45
+ if query.query_type == DataviewQueryType.TABLE:
46
+ df = self._build_table(candidates, query)
47
+ elif query.query_type == DataviewQueryType.LIST:
48
+ df = self._build_list(candidates)
49
+ else:
50
+ raise NotImplementedError(f"Query type {query.query_type} not yet supported")
51
+
52
+ # Apply sorting
53
+ if query.sort_clauses and not df.empty:
54
+ for sort in reversed(query.sort_clauses): # Apply in reverse order
55
+ df = self._apply_sort(df, sort)
56
+
57
+ # Apply limit
58
+ if query.limit and len(df) > query.limit:
59
+ df = df.head(query.limit)
60
+
61
+ return df
62
+
63
+ def _get_candidates(self, from_clause: str | None, context_note: Note | None) -> list[Note]:
64
+ """Get candidate notes based on FROM clause.
65
+
66
+ Args:
67
+ from_clause: The FROM clause (e.g., '"Lore/Locations"', '#tag')
68
+ context_note: The note containing the query
69
+
70
+ Returns:
71
+ List of candidate notes
72
+ """
73
+ if not from_clause:
74
+ # No FROM clause means all notes
75
+ return self.vault.notes
76
+
77
+ from_clause = from_clause.strip()
78
+
79
+ # Handle folder paths (quoted strings)
80
+ if from_clause.startswith('"') and from_clause.endswith('"'):
81
+ folder = from_clause[1:-1]
82
+ return self._get_notes_in_folder(folder)
83
+
84
+ # Handle tags
85
+ if from_clause.startswith("#"):
86
+ tag = from_clause[1:]
87
+ return self.vault.get_notes_with_tag(tag)
88
+
89
+ # Handle other patterns (simplified for now)
90
+ return self.vault.notes
91
+
92
+ def _get_notes_in_folder(self, folder: str) -> list[Note]:
93
+ """Get all notes in a specific folder.
94
+
95
+ Args:
96
+ folder: Folder path relative to vault root
97
+
98
+ Returns:
99
+ List of notes in the folder
100
+ """
101
+ folder_path = self.vault.path / folder
102
+ notes = []
103
+
104
+ for note in self.vault.notes:
105
+ # Check if note is in the specified folder
106
+ try:
107
+ note.path.relative_to(folder_path)
108
+ notes.append(note)
109
+ except ValueError:
110
+ # Not in this folder
111
+ continue
112
+
113
+ return notes
114
+
115
+ def _get_metadata(self, note: Note) -> dict[str, Any]:
116
+ """Get all metadata for a note (cached).
117
+
118
+ Args:
119
+ note: The note to get metadata for
120
+
121
+ Returns:
122
+ Dictionary of metadata
123
+ """
124
+ if note.path not in self._metadata_cache:
125
+ metadata = {
126
+ "file": {
127
+ "name": note.name,
128
+ "link": f"[[{note.name}]]", # Add file.link
129
+ "path": str(note.path.relative_to(self.vault.path)),
130
+ "folder": str(note.path.parent.relative_to(self.vault.path)),
131
+ "ext": note.path.suffix,
132
+ "size": note.path.stat().st_size,
133
+ "ctime": note.path.stat().st_ctime,
134
+ "mtime": note.path.stat().st_mtime,
135
+ },
136
+ **note.get_metadata(), # Frontmatter + inline fields
137
+ }
138
+ self._metadata_cache[note.path] = metadata
139
+
140
+ return self._metadata_cache[note.path]
141
+
142
+ def _evaluate_function(self, func_name: str, args_str: str, note: Note, context_note: Note | None) -> bool:
143
+ """Evaluate a Dataview function.
144
+
145
+ Args:
146
+ func_name: The function name (e.g., 'contains')
147
+ args_str: The function arguments as a string
148
+ note: The note being evaluated
149
+ context_note: The note containing the query
150
+
151
+ Returns:
152
+ The function result
153
+ """
154
+ # Parse arguments
155
+ args = [arg.strip() for arg in self._split_function_args(args_str)]
156
+
157
+ if func_name == "contains":
158
+ if len(args) != 2:
159
+ return False
160
+
161
+ # Evaluate both arguments
162
+ container = self._evaluate_expression(args[0], note, context_note)
163
+ value = self._evaluate_expression(args[1], note, context_note)
164
+
165
+ # Handle different container types
166
+ if container is None or value is None:
167
+ return False
168
+
169
+ # Normalize the search value - remove [[ ]] if present
170
+ search_value = str(value).strip()
171
+ search_value_plain = search_value.strip("[]")
172
+ search_value_link = f"[[{search_value_plain}]]"
173
+
174
+ if isinstance(container, str):
175
+ # For strings, check if value is contained (case-insensitive)
176
+ container_lower = container.lower()
177
+ return (
178
+ search_value.lower() in container_lower
179
+ or search_value_plain.lower() in container_lower
180
+ or search_value_link.lower() in container_lower
181
+ )
182
+
183
+ if isinstance(container, list):
184
+ # For lists, check each item
185
+ for item in container:
186
+ if item is None:
187
+ continue
188
+
189
+ item_str = str(item).strip()
190
+ item_lower = item_str.lower()
191
+
192
+ # Check if the item matches any form of the search value
193
+ if (
194
+ item_lower == search_value.lower()
195
+ or item_lower == search_value_plain.lower()
196
+ or item_lower == search_value_link.lower()
197
+ or
198
+ # Also check if the plain value is contained within a wikilink
199
+ (
200
+ item_str.startswith("[[")
201
+ and item_str.endswith("]]")
202
+ and item_str[2:-2].lower() == search_value_plain.lower()
203
+ )
204
+ ):
205
+ return True
206
+
207
+ return False
208
+
209
+ # For other types, convert to string and check
210
+ container_str = str(container).lower()
211
+ return (
212
+ search_value.lower() in container_str
213
+ or search_value_plain.lower() in container_str
214
+ or search_value_link.lower() in container_str
215
+ )
216
+
217
+ # Add more functions as needed
218
+ print(f"Warning: Unknown function '{func_name}'")
219
+ return False
220
+
221
+ def _evaluate_expression(self, expr: str, note: Note, context_note: Note | None) -> Any:
222
+ """Evaluate a Dataview expression.
223
+
224
+ Args:
225
+ expr: The expression to evaluate (e.g., 'file.name', 'this.region')
226
+ note: The note being evaluated
227
+ context_note: The note containing the query
228
+
229
+ Returns:
230
+ The evaluated value
231
+ """
232
+ expr = expr.strip()
233
+ metadata = self._get_metadata(note)
234
+
235
+ # Handle quoted strings
236
+ if (expr.startswith('"') and expr.endswith('"')) or (expr.startswith("'") and expr.endswith("'")):
237
+ return expr[1:-1]
238
+
239
+ # Handle numbers
240
+ try:
241
+ if "." in expr:
242
+ return float(expr)
243
+ return int(expr)
244
+ except ValueError:
245
+ pass
246
+
247
+ # Handle boolean
248
+ if expr.lower() in ("true", "false"):
249
+ return expr.lower() == "true"
250
+
251
+ metadata = self._get_metadata(note)
252
+
253
+ # Handle 'this' references
254
+ if expr.startswith("this.") and context_note:
255
+ this_metadata = self._get_metadata(context_note)
256
+ field = expr[5:] # Remove 'this.'
257
+ return self._get_field_value(this_metadata, field)
258
+
259
+ # Handle file properties
260
+ if expr.startswith("file."):
261
+ field = expr[5:] # Remove 'file.'
262
+ return metadata["file"].get(field)
263
+
264
+ # Direct field reference
265
+ return self._get_field_value(metadata, expr)
266
+
267
+ def _get_field_value(self, metadata: dict[str, Any], field: str) -> Any:
268
+ """Get a field value from metadata, handling nested fields.
269
+
270
+ Args:
271
+ metadata: The metadata dictionary
272
+ field: The field name (may be nested with dots)
273
+
274
+ Returns:
275
+ The field value
276
+ """
277
+ parts = field.split(".")
278
+ value = metadata
279
+
280
+ for part in parts:
281
+ if isinstance(value, dict):
282
+ # Try exact match first
283
+ if part in value:
284
+ value = value.get(part)
285
+ else:
286
+ # Try case-insensitive match
287
+ for key in value.keys():
288
+ if key.lower() == part.lower():
289
+ value = value[key]
290
+ break
291
+ else:
292
+ return None
293
+ else:
294
+ return None
295
+
296
+ return value
297
+
298
+ def _apply_where(self, notes: list[Note], where_clause: str, context_note: Note | None) -> list[Note]:
299
+ """Apply WHERE clause filtering.
300
+
301
+ Args:
302
+ notes: List of notes to filter
303
+ where_clause: The WHERE clause
304
+ context_note: The note containing the query
305
+
306
+ Returns:
307
+ Filtered list of notes
308
+ """
309
+ filtered = []
310
+
311
+ for note in notes:
312
+ if self._evaluate_where_condition(note, where_clause, context_note):
313
+ filtered.append(note)
314
+
315
+ return filtered
316
+
317
+ def _evaluate_where_condition(self, note: Note, condition: str, context_note: Note | None) -> bool:
318
+ """Evaluate a WHERE condition for a note.
319
+
320
+ Args:
321
+ note: The note to evaluate
322
+ condition: The WHERE condition
323
+ context_note: The note containing the query
324
+
325
+ Returns:
326
+ True if condition is met
327
+ """
328
+ condition = condition.strip()
329
+ if condition.startswith("(") and condition.endswith(")"):
330
+ # Check if these are matching outer parentheses
331
+ depth = 0
332
+ for i, char in enumerate(condition):
333
+ if char == "(":
334
+ depth += 1
335
+ elif char == ")":
336
+ depth -= 1
337
+ if depth == 0 and i < len(condition) - 1:
338
+ # Not outer parentheses
339
+ break
340
+ else:
341
+ # These are outer parentheses, remove them
342
+ condition = condition[1:-1].strip()
343
+
344
+ # Handle OR with proper precedence (OR has lower precedence than AND)
345
+ # First, split by OR at the top level (not inside parentheses)
346
+ or_parts = self._split_by_operator(condition, " OR ")
347
+ if len(or_parts) > 1:
348
+ return any(self._evaluate_where_condition(note, part.strip(), context_note) for part in or_parts)
349
+
350
+ # Handle AND at the top level
351
+ and_parts = self._split_by_operator(condition, " AND ")
352
+ if len(and_parts) > 1:
353
+ return all(self._evaluate_where_condition(note, part.strip(), context_note) for part in and_parts)
354
+
355
+ # Handle function calls like contains()
356
+ func_match = re.match(r"(\w+)\s*\((.*)\)", condition.strip())
357
+ if func_match:
358
+ func_name = func_match.group(1)
359
+ args_str = func_match.group(2)
360
+ return self._evaluate_function(func_name, args_str, note, context_note)
361
+
362
+ # Handle != comparison
363
+ if " != " in condition:
364
+ left, right = condition.split(" != ", 1)
365
+ left_val = self._evaluate_expression(left.strip(), note, context_note)
366
+ right_val = self._evaluate_expression(right.strip(), note, context_note)
367
+ return left_val != right_val
368
+
369
+ # Handle = comparison
370
+ if " = " in condition:
371
+ left, right = condition.split(" = ", 1)
372
+ left_val = self._evaluate_expression(left.strip(), note, context_note)
373
+ right_val = self._evaluate_expression(right.strip(), note, context_note)
374
+ return left_val == right_val
375
+
376
+ # If we can't parse the condition, return False (safer than True)
377
+ print(f"Warning: Could not parse WHERE condition: {condition}")
378
+ return False
379
+
380
+ def _split_by_operator(self, text: str, operator: str) -> list[str]:
381
+ """Split text by operator, respecting parentheses.
382
+
383
+ Args:
384
+ text: The text to split
385
+ operator: The operator to split by (e.g., ' AND ', ' OR ')
386
+
387
+ Returns:
388
+ List of parts
389
+ """
390
+ parts = []
391
+ current = []
392
+ paren_depth = 0
393
+ i = 0
394
+
395
+ while i < len(text):
396
+ if text[i] == "(":
397
+ paren_depth += 1
398
+ current.append(text[i])
399
+ elif text[i] == ")":
400
+ paren_depth -= 1
401
+ current.append(text[i])
402
+ elif paren_depth == 0 and text[i : i + len(operator)] == operator:
403
+ # Found operator at top level
404
+ parts.append("".join(current))
405
+ current = []
406
+ i += len(operator) - 1 # Skip the operator
407
+ else:
408
+ current.append(text[i])
409
+ i += 1
410
+
411
+ # Don't forget the last part
412
+ if current:
413
+ parts.append("".join(current))
414
+
415
+ return parts if len(parts) > 1 else [text]
416
+
417
+ def _evaluate_function(self, func_name: str, args_str: str, note: Note, context_note: Note | None) -> bool:
418
+ """Evaluate a Dataview function.
419
+
420
+ Args:
421
+ func_name: The function name (e.g., 'contains')
422
+ args_str: The function arguments as a string
423
+ note: The note being evaluated
424
+ context_note: The note containing the query
425
+
426
+ Returns:
427
+ The function result
428
+ """
429
+ # Parse arguments (simple comma split - doesn't handle nested functions)
430
+ args = [arg.strip() for arg in self._split_function_args(args_str)]
431
+
432
+ if func_name == "contains":
433
+ if len(args) != 2:
434
+ return False
435
+
436
+ # Evaluate both arguments
437
+ container = self._evaluate_expression(args[0], note, context_note)
438
+ value = self._evaluate_expression(args[1], note, context_note)
439
+
440
+ # Handle different container types
441
+ if container is None:
442
+ return False
443
+
444
+ if isinstance(container, str) and isinstance(value, str):
445
+ return value.lower() in container.lower()
446
+
447
+ if isinstance(container, list):
448
+ # Check if value is in the list (case-insensitive for strings)
449
+ return any(
450
+ (isinstance(item, str) and isinstance(value, str) and item.lower() == value.lower())
451
+ or item == value
452
+ for item in container
453
+ )
454
+
455
+ return False
456
+
457
+ # Add more functions as needed
458
+ print(f"Warning: Unknown function '{func_name}'")
459
+ return False
460
+
461
+ def _split_function_args(self, args_str: str) -> list[str]:
462
+ """Split function arguments respecting quotes and parentheses.
463
+
464
+ Args:
465
+ args_str: The arguments string
466
+
467
+ Returns:
468
+ List of argument strings
469
+ """
470
+ args = []
471
+ current = []
472
+ paren_depth = 0
473
+ quote_char = None
474
+
475
+ for char in args_str:
476
+ if quote_char:
477
+ # Inside quotes
478
+ current.append(char)
479
+ if char == quote_char and (not current or current[-2] != "\\"):
480
+ quote_char = None
481
+ elif char in ('"', "'"):
482
+ # Starting quotes
483
+ quote_char = char
484
+ current.append(char)
485
+ elif char == "(":
486
+ paren_depth += 1
487
+ current.append(char)
488
+ elif char == ")":
489
+ paren_depth -= 1
490
+ current.append(char)
491
+ elif char == "," and paren_depth == 0:
492
+ # Argument separator
493
+ args.append("".join(current).strip())
494
+ current = []
495
+ else:
496
+ current.append(char)
497
+
498
+ # Don't forget the last argument
499
+ if current:
500
+ args.append("".join(current).strip())
501
+
502
+ return args
503
+
504
+ def _evaluate_expression(self, expr: str, note: Note, context_note: Note | None) -> Any:
505
+ """Evaluate a Dataview expression.
506
+
507
+ Args:
508
+ expr: The expression to evaluate (e.g., 'file.name', 'this.region')
509
+ note: The note being evaluated
510
+ context_note: The note containing the query
511
+
512
+ Returns:
513
+ The evaluated value
514
+ """
515
+ expr = expr.strip()
516
+
517
+ # Handle quoted strings
518
+ if (expr.startswith('"') and expr.endswith('"')) or (expr.startswith("'") and expr.endswith("'")):
519
+ return expr[1:-1]
520
+
521
+ # Handle numbers
522
+ try:
523
+ if "." in expr:
524
+ return float(expr)
525
+ return int(expr)
526
+ except ValueError:
527
+ pass
528
+
529
+ # Handle boolean
530
+ if expr.lower() in ("true", "false"):
531
+ return expr.lower() == "true"
532
+
533
+ metadata = self._get_metadata(note)
534
+
535
+ # Handle 'this' references
536
+ if expr.startswith("this.") and context_note:
537
+ this_metadata = self._get_metadata(context_note)
538
+ field = expr[5:] # Remove 'this.'
539
+ return self._get_field_value(this_metadata, field)
540
+
541
+ # Handle file properties
542
+ if expr.startswith("file."):
543
+ field = expr[5:] # Remove 'file.'
544
+ return metadata["file"].get(field)
545
+
546
+ # Direct field reference
547
+ return self._get_field_value(metadata, expr)
548
+
549
+ def _build_table(self, notes: list[Note], query: DataviewQuery) -> pd.DataFrame:
550
+ """Build a table from notes based on query fields.
551
+
552
+ Args:
553
+ notes: List of notes to include
554
+ query: The query with field definitions
555
+
556
+ Returns:
557
+ DataFrame with table data
558
+ """
559
+ data = []
560
+
561
+ for note in notes:
562
+ row = {"File": f"[[{note.name}]]"}
563
+
564
+ # Add requested fields
565
+ for field in query.fields:
566
+ value = self._evaluate_expression(field.expression or field.name, note, None)
567
+ row[field.display_name] = value if value is not None else ""
568
+
569
+ data.append(row)
570
+
571
+ return pd.DataFrame(data)
572
+
573
+ def _build_list(self, notes: list[Note]) -> pd.DataFrame:
574
+ """Build a list from notes.
575
+
576
+ Args:
577
+ notes: List of notes to include
578
+
579
+ Returns:
580
+ DataFrame with list data
581
+ """
582
+ data = [{"File": f"[[{note.name}]]"} for note in notes]
583
+ return pd.DataFrame(data)
584
+
585
+ def _apply_sort(self, df: pd.DataFrame, sort) -> pd.DataFrame:
586
+ """Apply sorting to a DataFrame.
587
+
588
+ Args:
589
+ df: The DataFrame to sort
590
+ sort: The sort specification
591
+
592
+ Returns:
593
+ Sorted DataFrame
594
+ """
595
+ # Map field names to column names
596
+ sort_col = sort.field
597
+ if sort.field == "file.name":
598
+ sort_col = "File"
599
+
600
+ if sort_col in df.columns:
601
+ return df.sort_values(by=sort_col, ascending=sort.ascending)
602
+
603
+ return df
604
+
605
+
606
+ class DebugDataviewEvaluator(DataviewEvaluator):
607
+ """Debug version that prints evaluation steps."""
608
+
609
+ def _apply_where(self, notes: list[Note], where_clause: str, context_note: Note | None) -> list[Note]:
610
+ """Apply WHERE clause filtering with debug output."""
611
+ print(f"\nDEBUG: Applying WHERE clause: {where_clause}")
612
+ print(f"DEBUG: Context note: {context_note.name if context_note else 'None'}")
613
+
614
+ filtered = []
615
+
616
+ for note in notes:
617
+ result = self._evaluate_where_condition(note, where_clause, context_note)
618
+ if result:
619
+ filtered.append(note)
620
+ print(f" ✓ {note.name}: INCLUDED")
621
+ else:
622
+ print(f" ✗ {note.name}: EXCLUDED")
623
+
624
+ print(f"DEBUG: Filtered from {len(notes)} to {len(filtered)} notes")
625
+ return filtered
626
+
627
+ def _evaluate_function(self, func_name: str, args_str: str, note: Note, context_note: Note | None) -> bool:
628
+ """Evaluate function with debug output."""
629
+ result = super()._evaluate_function(func_name, args_str, note, context_note)
630
+
631
+ # Parse and evaluate arguments for debugging
632
+ args = [arg.strip() for arg in self._split_function_args(args_str)]
633
+ evaluated_args = [self._evaluate_expression(arg, note, context_note) for arg in args]
634
+
635
+ print(f" {func_name}({', '.join(str(a) for a in evaluated_args)}) = {result}")
636
+ return result
@@ -0,0 +1,16 @@
1
+ from pathlib import Path
2
+
3
+
4
+ class NoteNotFoundError(Exception):
5
+ """Raised when a note cannot be found."""
6
+
7
+ pass
8
+
9
+
10
+ class AmbiguousNoteError(Exception):
11
+ """Raised when multiple notes match a query."""
12
+
13
+ def __init__(self, query: str, matches: list[Path]):
14
+ self.query = query
15
+ self.matches = matches
16
+ super().__init__(f"Multiple notes found for '{query}': {[m.stem for m in matches]}")