philoch-bib-sdk 0.3.9__cp313-cp313-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. philoch_bib_sdk/__init__.py +0 -0
  2. philoch_bib_sdk/_rust.cpython-313-darwin.so +0 -0
  3. philoch_bib_sdk/adapters/io/__init__.py +115 -0
  4. philoch_bib_sdk/adapters/io/csv/__init__.py +308 -0
  5. philoch_bib_sdk/adapters/io/ods/__init__.py +145 -0
  6. philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
  7. philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +58 -0
  8. philoch_bib_sdk/converters/latex.py +6 -0
  9. philoch_bib_sdk/converters/plaintext/author/formatter.py +34 -0
  10. philoch_bib_sdk/converters/plaintext/author/parser.py +83 -0
  11. philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +8 -0
  12. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +21 -0
  13. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +158 -0
  14. philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +37 -0
  15. philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +62 -0
  16. philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +182 -0
  17. philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +13 -0
  18. philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +63 -0
  19. philoch_bib_sdk/converters/plaintext/bibitem/parser.py +415 -0
  20. philoch_bib_sdk/converters/plaintext/journal/formatter.py +25 -0
  21. philoch_bib_sdk/converters/plaintext/journal/parser.py +36 -0
  22. philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +25 -0
  23. philoch_bib_sdk/interfaces/cli/__init__.py +3 -0
  24. philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +135 -0
  25. philoch_bib_sdk/logic/__init__.py +39 -0
  26. philoch_bib_sdk/logic/default_models.py +315 -0
  27. philoch_bib_sdk/logic/functions/__init__.py +31 -0
  28. philoch_bib_sdk/logic/functions/comparator.py +414 -0
  29. philoch_bib_sdk/logic/functions/fuzzy_matcher.py +796 -0
  30. philoch_bib_sdk/logic/functions/journal_article_matcher.py +44 -0
  31. philoch_bib_sdk/logic/literals.py +98 -0
  32. philoch_bib_sdk/logic/models.py +366 -0
  33. philoch_bib_sdk/logic/models_staging.py +173 -0
  34. philoch_bib_sdk/procedures/fuzzy_matching.py +112 -0
  35. philoch_bib_sdk/py.typed +0 -0
  36. philoch_bib_sdk/rust_scorer/Cargo.lock +232 -0
  37. philoch_bib_sdk/rust_scorer/Cargo.toml +26 -0
  38. philoch_bib_sdk/rust_scorer/pyproject.toml +15 -0
  39. philoch_bib_sdk/rust_scorer/rust_scorer.pyi +65 -0
  40. philoch_bib_sdk/rust_scorer/src/lib.rs +362 -0
  41. philoch_bib_sdk-0.3.9.dist-info/METADATA +15 -0
  42. philoch_bib_sdk-0.3.9.dist-info/RECORD +44 -0
  43. philoch_bib_sdk-0.3.9.dist-info/WHEEL +4 -0
  44. philoch_bib_sdk-0.3.9.dist-info/licenses/LICENSE +21 -0
File without changes
@@ -0,0 +1,115 @@
1
+ """IO adapters with automatic format detection.
2
+
3
+ This module provides format-agnostic wrappers that detect file formats
4
+ and delegate to appropriate format-specific adapters.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Dict, Tuple
9
+
10
+ from aletk.ResultMonad import Err, Ok
11
+
12
+ from philoch_bib_sdk.adapters.io.csv import (
13
+ load_bibliography_csv,
14
+ load_staged_csv,
15
+ write_report_csv,
16
+ )
17
+ from philoch_bib_sdk.adapters.io.ods import (
18
+ load_bibliography_ods,
19
+ load_staged_ods,
20
+ )
21
+ from philoch_bib_sdk.logic.models import BibItem
22
+ from philoch_bib_sdk.logic.models_staging import BibItemStaged
23
+
24
+
25
+ def load_bibliography(filename: str, max_rows: int | None = None) -> Ok[Dict[str, BibItem]] | Err:
26
+ """Load bibliography with automatic format detection.
27
+
28
+ Detects format based on file extension and delegates to appropriate adapter.
29
+
30
+ Supported formats:
31
+ - .csv: CSV format
32
+ - .ods: OpenDocument Spreadsheet format
33
+
34
+ Args:
35
+ filename: Path to bibliography file
36
+ max_rows: Optional limit on number of rows (for testing large files)
37
+
38
+ Returns:
39
+ Ok[Dict[str, BibItem]] with bibkey as key, or Err on failure
40
+ """
41
+ file_path = Path(filename)
42
+ suffix = file_path.suffix.lower()
43
+
44
+ match suffix:
45
+ case ".csv":
46
+ return load_bibliography_csv(filename)
47
+ case ".ods":
48
+ return load_bibliography_ods(filename, max_rows=max_rows)
49
+ case _:
50
+ return Err(
51
+ message=f"Unsupported bibliography format: {suffix}. Supported: .csv, .ods",
52
+ code=-1,
53
+ error_type="UnsupportedFormatError",
54
+ )
55
+
56
+
57
+ def load_staged(filename: str, max_rows: int | None = None) -> Ok[Tuple[BibItem, ...]] | Err:
58
+ """Load staged items with automatic format detection.
59
+
60
+ Detects format based on file extension and delegates to appropriate adapter.
61
+
62
+ Supported formats:
63
+ - .csv: CSV format
64
+ - .ods: OpenDocument Spreadsheet format
65
+
66
+ Args:
67
+ filename: Path to staged items file
68
+ max_rows: Optional limit on number of rows (for testing large files)
69
+
70
+ Returns:
71
+ Ok[Tuple[BibItem, ...]] or Err on failure
72
+ """
73
+ file_path = Path(filename)
74
+ suffix = file_path.suffix.lower()
75
+
76
+ match suffix:
77
+ case ".csv":
78
+ return load_staged_csv(filename)
79
+ case ".ods":
80
+ return load_staged_ods(filename, max_rows=max_rows)
81
+ case _:
82
+ return Err(
83
+ message=f"Unsupported staged items format: {suffix}. Supported: .csv, .ods",
84
+ code=-1,
85
+ error_type="UnsupportedFormatError",
86
+ )
87
+
88
+
89
+ def write_report(filename: str, staged: Tuple[BibItemStaged, ...], output_format: str = "csv") -> Ok[None] | Err:
90
+ """Write fuzzy matching report with format selection.
91
+
92
+ Args:
93
+ filename: Path to output file (extension will be added based on format)
94
+ staged: Tuple of staged items with matches
95
+ output_format: Output format ("csv", etc.)
96
+
97
+ Returns:
98
+ Ok[None] on success, Err on failure
99
+ """
100
+ match output_format.lower():
101
+ case "csv":
102
+ return write_report_csv(filename, staged)
103
+ case _:
104
+ return Err(
105
+ message=f"Unsupported output format: {output_format}. Supported: csv",
106
+ code=-1,
107
+ error_type="UnsupportedFormatError",
108
+ )
109
+
110
+
111
+ __all__ = [
112
+ "load_bibliography",
113
+ "load_staged",
114
+ "write_report",
115
+ ]
@@ -0,0 +1,308 @@
1
+ """CSV adapters for reading and writing bibliographic data.
2
+
3
+ This module provides CSV-specific implementations for loading bibliographies,
4
+ staged items, and writing fuzzy matching reports.
5
+ """
6
+
7
+ import csv
8
+ import traceback
9
+ from pathlib import Path
10
+ from typing import Any, Dict, Tuple
11
+
12
+ from aletk.ResultMonad import Err, Ok
13
+ from aletk.utils import get_logger
14
+
15
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_formatter import format_bibkey
16
+ from philoch_bib_sdk.converters.plaintext.bibitem.parser import (
17
+ ParsedBibItemData,
18
+ parse_bibitem,
19
+ )
20
+ from philoch_bib_sdk.logic.models import BibItem
21
+ from philoch_bib_sdk.logic.models_staging import BibItemStaged
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ def _csv_row_to_parsed_data(row: dict[str, Any]) -> ParsedBibItemData:
27
+ """Convert CSV row to ParsedBibItemData, filtering empty values.
28
+
29
+ Args:
30
+ row: Dictionary from csv.DictReader
31
+
32
+ Returns:
33
+ ParsedBibItemData with empty values removed
34
+ """
35
+ # Filter out empty values and create ParsedBibItemData
36
+ # TypedDict with total=False allows any subset of fields
37
+ return {k: v for k, v in row.items() if v} # type: ignore[return-value]
38
+
39
+
40
+ def load_bibliography_csv(filename: str) -> Ok[Dict[str, BibItem]] | Err:
41
+ """Load bibliography from CSV file.
42
+
43
+ Expected CSV format: Standard CSV with headers matching ParsedBibItemData fields.
44
+ Required columns: entry_type, author, title, date
45
+ Optional columns: journal, volume, number, pages, doi, etc.
46
+
47
+ Args:
48
+ filename: Path to CSV file
49
+
50
+ Returns:
51
+ Ok[Dict[str, BibItem]] with bibkey as key, or Err on failure
52
+ """
53
+ try:
54
+ file_path = Path(filename)
55
+ if not file_path.exists():
56
+ return Err(
57
+ message=f"File not found: {filename}",
58
+ code=-1,
59
+ error_type="FileNotFoundError",
60
+ )
61
+
62
+ bibliography: Dict[str, BibItem] = {}
63
+ errors = []
64
+
65
+ with open(file_path, "r", encoding="utf-8") as f:
66
+ reader = csv.DictReader(f)
67
+
68
+ if reader.fieldnames is None:
69
+ return Err(
70
+ message=f"CSV file has no headers: {filename}",
71
+ code=-1,
72
+ error_type="CSVFormatError",
73
+ )
74
+
75
+ for row_num, row in enumerate(reader, start=2): # Start at 2 (header is row 1)
76
+ # Convert CSV row to ParsedBibItemData
77
+ parsed_data = _csv_row_to_parsed_data(row)
78
+
79
+ # Parse the row into a BibItem
80
+ parse_result = parse_bibitem(parsed_data, bibstring_type="simplified")
81
+
82
+ if isinstance(parse_result, Err):
83
+ errors.append(f"Row {row_num}: {parse_result.message}")
84
+ continue
85
+
86
+ bibitem = parse_result.out
87
+ bibkey = format_bibkey(bibitem.bibkey)
88
+
89
+ # Check for duplicate bibkeys
90
+ if bibkey in bibliography:
91
+ errors.append(f"Row {row_num}: Duplicate bibkey '{bibkey}' (first seen in earlier row)")
92
+ continue
93
+
94
+ bibliography[bibkey] = bibitem
95
+
96
+ # Report results
97
+ if errors:
98
+ error_summary = f"Loaded {len(bibliography)} items with {len(errors)} errors:\n" + "\n".join(
99
+ errors[:10] # Show first 10 errors
100
+ )
101
+ if len(errors) > 10:
102
+ error_summary += f"\n... and {len(errors) - 10} more errors"
103
+
104
+ logger.warning(error_summary)
105
+
106
+ if not bibliography:
107
+ return Err(
108
+ message=f"No valid items loaded from {filename}. Errors: {len(errors)}",
109
+ code=-1,
110
+ error_type="EmptyBibliographyError",
111
+ )
112
+
113
+ logger.info(f"Successfully loaded {len(bibliography)} items from {filename}")
114
+ return Ok(bibliography)
115
+
116
+ except Exception as e:
117
+ return Err(
118
+ message=f"Failed to load bibliography from {filename}: {e.__class__.__name__}: {e}",
119
+ code=-1,
120
+ error_type=e.__class__.__name__,
121
+ error_trace=traceback.format_exc(),
122
+ )
123
+
124
+
125
+ def load_staged_csv_allow_empty_bibkeys(filename: str) -> Ok[Tuple[BibItem, ...]] | Err:
126
+ """Load staged items from CSV file, allowing empty bibkeys.
127
+
128
+ This is useful for staging files where bibkeys haven't been assigned yet.
129
+ Items without bibkeys will be assigned temporary sequential keys.
130
+
131
+ Args:
132
+ filename: Path to CSV file
133
+
134
+ Returns:
135
+ Ok[Tuple[BibItem, ...]] or Err on failure
136
+ """
137
+ try:
138
+ file_path = Path(filename)
139
+
140
+ if not file_path.exists():
141
+ return Err(
142
+ message=f"File not found: {filename}",
143
+ code=-1,
144
+ error_type="FileNotFoundError",
145
+ )
146
+
147
+ staged_items: list[BibItem] = []
148
+ errors = []
149
+
150
+ with open(file_path, "r", encoding="utf-8") as f:
151
+ reader = csv.DictReader(f)
152
+
153
+ if reader.fieldnames is None:
154
+ return Err(
155
+ message=f"CSV file has no headers: {filename}",
156
+ code=-1,
157
+ error_type="CSVFormatError",
158
+ )
159
+
160
+ for row_num, row in enumerate(reader, start=2): # Start at 2 (header is row 1)
161
+ # Convert CSV row to ParsedBibItemData
162
+ parsed_data = _csv_row_to_parsed_data(row)
163
+
164
+ # If bibkey is empty, assign a temporary one
165
+ if not parsed_data.get("bibkey"):
166
+ parsed_data["bibkey"] = f"temp:{row_num}"
167
+
168
+ # Parse the row into a BibItem
169
+ parse_result = parse_bibitem(parsed_data, bibstring_type="simplified")
170
+
171
+ if isinstance(parse_result, Err):
172
+ errors.append(f"Row {row_num}: {parse_result.message}")
173
+ continue
174
+
175
+ bibitem = parse_result.out
176
+ staged_items.append(bibitem)
177
+
178
+ # Report results
179
+ if errors:
180
+ error_summary = f"Loaded {len(staged_items)} items with {len(errors)} errors:\n" + "\n".join(
181
+ errors[:10] # Show first 10 errors
182
+ )
183
+ if len(errors) > 10:
184
+ error_summary += f"\n... and {len(errors) - 10} more errors"
185
+
186
+ logger.warning(error_summary)
187
+
188
+ if not staged_items:
189
+ return Err(
190
+ message=f"No valid items loaded from {filename}. Errors: {len(errors)}",
191
+ code=-1,
192
+ error_type="EmptyFileError",
193
+ )
194
+
195
+ logger.info(f"Successfully loaded {len(staged_items)} staged items from {filename}")
196
+
197
+ return Ok(tuple(staged_items))
198
+
199
+ except Exception as e:
200
+ return Err(
201
+ message=f"Failed to load staged items from {filename}: {e.__class__.__name__}: {e}",
202
+ code=-1,
203
+ error_type=e.__class__.__name__,
204
+ error_trace=traceback.format_exc(),
205
+ )
206
+
207
+
208
+ def load_staged_csv(filename: str) -> Ok[Tuple[BibItem, ...]] | Err:
209
+ """Load staged items from CSV file.
210
+
211
+ Uses the same format as load_bibliography_csv - standard CSV with ParsedBibItemData fields.
212
+ Additional score-related columns (if present) are ignored during loading.
213
+
214
+ Args:
215
+ filename: Path to CSV file
216
+
217
+ Returns:
218
+ Ok[Tuple[BibItem, ...]] or Err on failure
219
+ """
220
+ try:
221
+ # Load as bibliography first
222
+ result = load_bibliography_csv(filename)
223
+
224
+ if isinstance(result, Err):
225
+ return result
226
+
227
+ bibliography = result.out
228
+
229
+ # Convert dict values to tuple
230
+ staged_items = tuple(bibliography.values())
231
+
232
+ logger.info(f"Successfully loaded {len(staged_items)} staged items from {filename}")
233
+ return Ok(staged_items)
234
+
235
+ except Exception as e:
236
+ return Err(
237
+ message=f"Failed to load staged items from {filename}: {e.__class__.__name__}: {e}",
238
+ code=-1,
239
+ error_type=e.__class__.__name__,
240
+ error_trace=traceback.format_exc(),
241
+ )
242
+
243
+
244
+ def write_report_csv(filename: str, staged: Tuple[BibItemStaged, ...]) -> Ok[None] | Err:
245
+ """Write fuzzy matching report to CSV file.
246
+
247
+ Output format: Uses BibItemStaged.to_csv_row() with columns:
248
+ - staged_bibkey, staged_title, staged_author, staged_year
249
+ - num_matches, best_match_score, best_match_bibkey
250
+ - top_matches_json (JSON-encoded match details)
251
+ - search_time_ms, candidates_searched
252
+
253
+ Args:
254
+ filename: Path to output CSV file (without extension)
255
+ staged: Tuple of staged items with matches
256
+
257
+ Returns:
258
+ Ok[None] on success, Err on failure
259
+ """
260
+ try:
261
+ # Add .csv extension if not present
262
+ output_path = Path(filename)
263
+ if output_path.suffix != ".csv":
264
+ output_path = output_path.with_suffix(".csv")
265
+
266
+ if not staged:
267
+ logger.warning("No staged items to write")
268
+ # Create empty file with headers
269
+ with open(output_path, "w", encoding="utf-8", newline="") as f:
270
+ simple_writer = csv.writer(f)
271
+ simple_writer.writerow(
272
+ [
273
+ "staged_bibkey",
274
+ "staged_title",
275
+ "staged_author",
276
+ "staged_year",
277
+ "num_matches",
278
+ "best_match_score",
279
+ "best_match_bibkey",
280
+ "top_matches_json",
281
+ "search_time_ms",
282
+ "candidates_searched",
283
+ ]
284
+ )
285
+ logger.info(f"Created empty report at {output_path}")
286
+ return Ok(None)
287
+
288
+ # Convert to CSV rows
289
+ rows = tuple(item.to_csv_row() for item in staged)
290
+
291
+ # Write to CSV
292
+ with open(output_path, "w", encoding="utf-8", newline="") as f:
293
+ if rows:
294
+ fieldnames = list(rows[0].keys())
295
+ dict_writer = csv.DictWriter(f, fieldnames=fieldnames)
296
+ dict_writer.writeheader()
297
+ dict_writer.writerows(rows)
298
+
299
+ logger.info(f"Successfully wrote {len(staged)} items to {output_path}")
300
+ return Ok(None)
301
+
302
+ except Exception as e:
303
+ return Err(
304
+ message=f"Failed to write report to {filename}: {e.__class__.__name__}: {e}",
305
+ code=-1,
306
+ error_type=e.__class__.__name__,
307
+ error_trace=traceback.format_exc(),
308
+ )
@@ -0,0 +1,145 @@
1
+ """ODS (OpenDocument Spreadsheet) adapters for bibliography I/O operations."""
2
+
3
+ import traceback
4
+ from typing import Dict, Tuple, Any
5
+ from pathlib import Path
6
+
7
+ from aletk.ResultMonad import Ok, Err
8
+ from aletk.utils import get_logger
9
+
10
+ from philoch_bib_sdk.logic.models import BibItem
11
+ from philoch_bib_sdk.converters.plaintext.bibitem.parser import parse_bibitem, ParsedBibItemData
12
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_formatter import format_bibkey
13
+
14
+ lgr = get_logger(__name__)
15
+
16
+ __all__: list[str] = [
17
+ "load_bibliography_ods",
18
+ "load_staged_ods",
19
+ ]
20
+
21
+
22
+ def _normalize_column_name(name: str) -> str:
23
+ """
24
+ Normalize column names from ODS to match ParsedBibItemData keys.
25
+
26
+ ODS columns use hyphens (e.g., 'journal-id') while ParsedBibItemData uses underscores.
27
+ """
28
+ return name.replace("-", "_")
29
+
30
+
31
+ def _ods_row_to_parsed_data(row: dict[str, Any]) -> ParsedBibItemData:
32
+ """
33
+ Convert an ODS row (dict) to ParsedBibItemData.
34
+
35
+ This helper exists to isolate the type: ignore directive.
36
+ Polars returns dict[str, Any] while ParsedBibItemData is a TypedDict.
37
+ We filter out None/empty values and normalize column names.
38
+ """
39
+ normalized = {_normalize_column_name(k): str(v) if v is not None else "" for k, v in row.items()}
40
+ return {k: v for k, v in normalized.items() if v} # type: ignore[return-value]
41
+
42
+
43
+ def load_bibliography_ods(
44
+ filename: str, max_rows: int | None = None, bibstring_type: str = "simplified"
45
+ ) -> Ok[Dict[str, BibItem]] | Err:
46
+ """
47
+ Load a bibliography from an ODS file.
48
+
49
+ Args:
50
+ filename: Path to the ODS file
51
+ max_rows: Optional limit on number of rows to read (for testing)
52
+ bibstring_type: Type of bibstring to use ('simplified', 'latex', 'unicode')
53
+
54
+ Returns:
55
+ Ok with dict mapping bibkey -> BibItem, or Err with details
56
+ """
57
+ try:
58
+ import polars as pl
59
+
60
+ if not Path(filename).exists():
61
+ return Err(message=f"File not found: {filename}", code=1)
62
+
63
+ # Read ODS file
64
+ df = pl.read_ods(source=filename, has_header=True)
65
+
66
+ if df.is_empty():
67
+ return Err(message=f"ODS file is empty: {filename}", code=1)
68
+
69
+ # Limit rows if requested
70
+ if max_rows is not None:
71
+ df = df.head(max_rows)
72
+
73
+ # Convert to list of dicts
74
+ rows = df.to_dicts()
75
+
76
+ bibliography: dict[str, BibItem] = {}
77
+ errors: list[str] = []
78
+ seen_bibkeys: dict[str, int] = {}
79
+
80
+ for i, row in enumerate(rows, start=2): # Start at 2 because row 1 is header
81
+ try:
82
+ parsed_data = _ods_row_to_parsed_data(row)
83
+ result = parse_bibitem(parsed_data, bibstring_type=bibstring_type) # type: ignore[arg-type]
84
+
85
+ if isinstance(result, Err):
86
+ errors.append(f"Row {i}: {result.message}")
87
+ continue
88
+
89
+ bibitem = result.out
90
+ bibkey_str = format_bibkey(bibitem.bibkey)
91
+
92
+ # Check for duplicates
93
+ if bibkey_str in seen_bibkeys:
94
+ first_row = seen_bibkeys[bibkey_str]
95
+ errors.append(f"Row {i}: Duplicate bibkey '{bibkey_str}' (first seen in row {first_row})")
96
+ continue
97
+
98
+ bibliography[bibkey_str] = bibitem
99
+ seen_bibkeys[bibkey_str] = i
100
+
101
+ except Exception as e:
102
+ errors.append(f"Row {i}: Unexpected error: {e}")
103
+ continue
104
+
105
+ if errors:
106
+ lgr.warning(f"Loaded {len(bibliography)} items with {len(errors)} errors:\n" + "\n".join(errors[:10]))
107
+
108
+ if not bibliography:
109
+ error_summary = "\n".join(errors[:5])
110
+ return Err(message=f"No valid items loaded from {filename}. Errors: {len(errors)}\n{error_summary}", code=1)
111
+
112
+ lgr.info(f"Successfully loaded {len(bibliography)} items from {filename}")
113
+ return Ok(bibliography)
114
+
115
+ except Exception as e:
116
+ return Err(
117
+ message=f"Failed to load bibliography from {filename}: {e.__class__.__name__}: {e}",
118
+ code=-1,
119
+ error_type=e.__class__.__name__,
120
+ error_trace=traceback.format_exc(),
121
+ )
122
+
123
+
124
+ def load_staged_ods(filename: str, max_rows: int | None = None) -> Ok[Tuple[BibItem, ...]] | Err:
125
+ """
126
+ Load staged BibItems from an ODS file.
127
+
128
+ Args:
129
+ filename: Path to the ODS file
130
+ max_rows: Optional limit on number of rows to read (for testing)
131
+
132
+ Returns:
133
+ Ok with tuple of BibItems, or Err with details
134
+ """
135
+ result = load_bibliography_ods(filename, max_rows=max_rows)
136
+
137
+ if isinstance(result, Err):
138
+ return result
139
+
140
+ bibliography = result.out
141
+ staged_items = tuple(bibliography.values())
142
+
143
+ lgr.info(f"Successfully loaded {len(staged_items)} staged items from {filename}")
144
+
145
+ return Ok(staged_items)
File without changes
@@ -0,0 +1,58 @@
1
+ from functools import partial
2
+ from typing import Callable, NamedTuple
3
+
4
+ import polars as pl
5
+
6
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_parser import hard_parse_bibkey
7
+ from philoch_bib_sdk.logic.functions.journal_article_matcher import (
8
+ TJournalBibkeyIndex,
9
+ TReadIndex,
10
+ )
11
+
12
+
13
+ class ColumnNames(NamedTuple):
14
+ bibkey: str
15
+ journal: str
16
+ volume: str
17
+ number: str
18
+
19
+
20
+ def _read_from_ods(
21
+ column_names: ColumnNames,
22
+ file_path: str,
23
+ ) -> TJournalBibkeyIndex:
24
+ """
25
+ Reads the specified columns from an ODS file and returns a TJournalBibkeyIndex dictionary.
26
+ Args:
27
+ column_names (ColumnNames): The names of the columns to read (journal, volume, number, bibkey).
28
+ file_path (str): The path to the ODS file.
29
+ Returns:
30
+ TJournalBibkeyIndex: A dictionary mapping (journal, volume, number) tuples to bibkey values.
31
+ """
32
+ df = pl.read_ods(
33
+ source=file_path,
34
+ has_header=True,
35
+ columns=[column_names.journal, column_names.volume, column_names.number, column_names.bibkey],
36
+ schema_overrides={
37
+ column_names.journal: pl.Utf8,
38
+ column_names.volume: pl.Utf8,
39
+ column_names.number: pl.Utf8,
40
+ column_names.bibkey: pl.Utf8,
41
+ },
42
+ )
43
+
44
+ if df.is_empty():
45
+ raise ValueError(
46
+ f"Tabular data at '{file_path}' is empty or does not contain the expected columns: {column_names}"
47
+ )
48
+
49
+ return {
50
+ (row[column_names.journal], row[column_names.volume], row[column_names.number]): hard_parse_bibkey(
51
+ row[column_names.bibkey]
52
+ )
53
+ for row in df.to_dicts()
54
+ }
55
+
56
+
57
+ type THOFReadFromOds = Callable[[ColumnNames], TReadIndex]
58
+ hof_read_from_ods: THOFReadFromOds = lambda column_names: partial(_read_from_ods, column_names)
@@ -0,0 +1,6 @@
1
+ def unicode_to_latex(unicode_str: str) -> str:
2
+ raise NotImplementedError("This function is not implemented yet.")
3
+
4
+
5
+ def latex_to_unicode(latex_str: str) -> str:
6
+ raise NotImplementedError("This function is not implemented yet.")
@@ -0,0 +1,34 @@
1
+ from typing import Tuple
2
+ from aletk.utils import get_logger
3
+ from philoch_bib_sdk.logic.models import Author, TBibString
4
+
5
+ lgr = get_logger(__name__)
6
+
7
+
8
+ def _full_name_generic(given_name: str, family_name: str, mononym: str) -> str:
9
+ if mononym:
10
+ return mononym
11
+
12
+ if not given_name and family_name:
13
+ return family_name
14
+
15
+ if not given_name:
16
+ return ""
17
+
18
+ if not family_name:
19
+ return given_name
20
+
21
+ return f"{family_name}, {given_name}"
22
+
23
+
24
+ def _format_single(author: Author, bibstring_type: TBibString) -> str:
25
+ given_name = f"{getattr(author.given_name, bibstring_type)}"
26
+ family_name = f"{getattr(author.family_name, bibstring_type)}"
27
+ mononym = f"{getattr(author.mononym, bibstring_type)}"
28
+
29
+ return _full_name_generic(given_name, family_name, mononym)
30
+
31
+
32
+ def format_author(authors: Tuple[Author, ...], bibstring_type: TBibString) -> str:
33
+ names = (_format_single(author, bibstring_type=bibstring_type) for author in authors)
34
+ return " and ".join(name for name in names if name)