philoch-bib-sdk 0.3.9__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. philoch_bib_sdk/__init__.py +0 -0
  2. philoch_bib_sdk/_rust.cp313-win_amd64.pyd +0 -0
  3. philoch_bib_sdk/adapters/io/__init__.py +115 -0
  4. philoch_bib_sdk/adapters/io/csv/__init__.py +308 -0
  5. philoch_bib_sdk/adapters/io/ods/__init__.py +145 -0
  6. philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
  7. philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +58 -0
  8. philoch_bib_sdk/converters/latex.py +6 -0
  9. philoch_bib_sdk/converters/plaintext/author/formatter.py +34 -0
  10. philoch_bib_sdk/converters/plaintext/author/parser.py +83 -0
  11. philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +8 -0
  12. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +21 -0
  13. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +158 -0
  14. philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +37 -0
  15. philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +62 -0
  16. philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +182 -0
  17. philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +13 -0
  18. philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +63 -0
  19. philoch_bib_sdk/converters/plaintext/bibitem/parser.py +415 -0
  20. philoch_bib_sdk/converters/plaintext/journal/formatter.py +25 -0
  21. philoch_bib_sdk/converters/plaintext/journal/parser.py +36 -0
  22. philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +25 -0
  23. philoch_bib_sdk/interfaces/cli/__init__.py +3 -0
  24. philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +135 -0
  25. philoch_bib_sdk/logic/__init__.py +39 -0
  26. philoch_bib_sdk/logic/default_models.py +315 -0
  27. philoch_bib_sdk/logic/functions/__init__.py +31 -0
  28. philoch_bib_sdk/logic/functions/comparator.py +414 -0
  29. philoch_bib_sdk/logic/functions/fuzzy_matcher.py +796 -0
  30. philoch_bib_sdk/logic/functions/journal_article_matcher.py +44 -0
  31. philoch_bib_sdk/logic/literals.py +98 -0
  32. philoch_bib_sdk/logic/models.py +366 -0
  33. philoch_bib_sdk/logic/models_staging.py +173 -0
  34. philoch_bib_sdk/procedures/fuzzy_matching.py +112 -0
  35. philoch_bib_sdk/py.typed +0 -0
  36. philoch_bib_sdk/rust_scorer/Cargo.lock +232 -0
  37. philoch_bib_sdk/rust_scorer/Cargo.toml +26 -0
  38. philoch_bib_sdk/rust_scorer/pyproject.toml +15 -0
  39. philoch_bib_sdk/rust_scorer/rust_scorer.pyi +65 -0
  40. philoch_bib_sdk/rust_scorer/src/lib.rs +362 -0
  41. philoch_bib_sdk-0.3.9.dist-info/METADATA +15 -0
  42. philoch_bib_sdk-0.3.9.dist-info/RECORD +44 -0
  43. philoch_bib_sdk-0.3.9.dist-info/WHEEL +4 -0
  44. philoch_bib_sdk-0.3.9.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,415 @@
1
+ import traceback
2
+ from typing import Tuple, Literal, TypedDict, TypeGuard, Any
3
+ from aletk.ResultMonad import Ok, Err
4
+ from aletk.utils import get_logger, remove_extra_whitespace
5
+ from philoch_bib_sdk.converters.plaintext.author.parser import parse_author
6
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_parser import parse_bibkey
7
+ from philoch_bib_sdk.converters.plaintext.bibitem.date_parser import parse_date
8
+ from philoch_bib_sdk.converters.plaintext.bibitem.pages_parser import parse_pages
9
+ from philoch_bib_sdk.converters.plaintext.journal.parser import parse_journal
10
+ from philoch_bib_sdk.logic.literals import (
11
+ TBibTeXEntryType,
12
+ TPubState,
13
+ TEpoch,
14
+ TLanguageID,
15
+ BIBTEX_ENTRY_TYPE_VALUES,
16
+ PUB_STATE_VALUES,
17
+ EPOCH_VALUES,
18
+ LANGUAGE_ID_VALUES,
19
+ )
20
+ from philoch_bib_sdk.logic.models import (
21
+ BibItem,
22
+ BibStringAttr,
23
+ BibKeyAttr,
24
+ Author,
25
+ PageAttr,
26
+ BibItemDateAttr,
27
+ BaseNamedRenderable,
28
+ KeywordsAttr,
29
+ Keyword,
30
+ TBibString,
31
+ )
32
+
33
+ lgr = get_logger(__name__)
34
+
35
+
36
+ class ParsedBibItemData(TypedDict, total=False):
37
+ _to_do_general: str
38
+ _change_request: str
39
+ entry_type: str
40
+ bibkey: str
41
+ author: str
42
+ _author_ids: str
43
+ editor: str
44
+ _editor_ids: str
45
+ author_ids: str
46
+ options: str
47
+ shorthand: str
48
+ date: str
49
+ pubstate: str
50
+ title: str
51
+ _title_unicode: str
52
+ booktitle: str
53
+ crossref: str
54
+ journal: str
55
+ journal_id: str
56
+ volume: str
57
+ number: str
58
+ pages: str
59
+ eid: str
60
+ series: str
61
+ address: str
62
+ institution: str
63
+ school: str
64
+ publisher: str
65
+ publisher_id: str
66
+ type: str
67
+ edition: str
68
+ note: str
69
+ _issuetitle: str
70
+ _guesteditor: str
71
+ _extra_note: str
72
+ urn: str
73
+ eprint: str
74
+ doi: str
75
+ url: str
76
+ _kw_level1: str
77
+ _kw_level2: str
78
+ _kw_level3: str
79
+ _epoch: str
80
+ _person: str
81
+ _comm_for_profile_bib: str
82
+ _langid: str
83
+ _lang_der: str
84
+ _further_refs: str
85
+ _depends_on: str
86
+ _dltc_num: str
87
+ _spec_interest: str
88
+ _note_perso: str
89
+ _note_stock: str
90
+ _note_status: str
91
+ _num_inwork_coll: str
92
+ _num_inwork: str
93
+ _num_coll: str
94
+ _dltc_copyediting_note: str
95
+ _note_missing: str
96
+ _num_sort: str
97
+
98
+
99
+ def _is_valid_bibtex_entry_type(value: Any) -> TypeGuard[TBibTeXEntryType]:
100
+ """
101
+ TypeGuard function to validate if a value is a valid BibTeX entry type.
102
+ """
103
+ return isinstance(value, str) and value in BIBTEX_ENTRY_TYPE_VALUES
104
+
105
+
106
+ def parse_entry_type(text: str) -> TBibTeXEntryType:
107
+ """
108
+ Parse the entry type from a string.
109
+ """
110
+ if text == "" or text == "UNKNOWN":
111
+ return "UNKNOWN"
112
+
113
+ clean = text.strip().replace(" ", "").lower().replace("@", "").replace("{", "").replace("}", "")
114
+
115
+ if _is_valid_bibtex_entry_type(clean):
116
+ return clean
117
+ else:
118
+ return "UNKNOWN"
119
+
120
+
121
+ def parse_options(text: str) -> Tuple[str, ...]:
122
+ """
123
+ Parse a comma-separated list of options.
124
+ """
125
+ if not text:
126
+ return ()
127
+ return tuple(remove_extra_whitespace(opt) for opt in text.split(",") if opt.strip())
128
+
129
+
130
+ def parse_bibkey_list(text: str) -> Tuple[BibKeyAttr, ...]:
131
+ """
132
+ Parse a comma-separated list of bibkeys.
133
+ """
134
+ if not text:
135
+ return ()
136
+
137
+ bibkeys = []
138
+ for bibkey_str in text.split(","):
139
+ bibkey_str = remove_extra_whitespace(bibkey_str)
140
+ if bibkey_str:
141
+ result = parse_bibkey(bibkey_str)
142
+ if isinstance(result, Ok):
143
+ bibkeys.append(result.out)
144
+ else:
145
+ raise ValueError(f"Failed to parse bibkey '{bibkey_str}': {result.message}")
146
+
147
+ return tuple(bibkeys)
148
+
149
+
150
+ def _clean_keyword(text: str) -> str:
151
+ """
152
+ Clean a keyword string by stripping whitespace and removing unwanted characters.
153
+ """
154
+ return remove_extra_whitespace(text).replace(",", "").replace(";", "")
155
+
156
+
157
+ def parse_keywords(level1: str, level2: str, level3: str) -> KeywordsAttr | None:
158
+ """
159
+ Parse keywords from three level strings.
160
+ """
161
+ if not any([level1, level2, level3]):
162
+ return None
163
+
164
+ return KeywordsAttr(
165
+ level_1=Keyword(name=_clean_keyword(level1), id=None) if level1 else Keyword(name="", id=None),
166
+ level_2=Keyword(name=_clean_keyword(level2), id=None) if level2 else Keyword(name="", id=None),
167
+ level_3=Keyword(name=_clean_keyword(level3), id=None) if level3 else Keyword(name="", id=None),
168
+ )
169
+
170
+
171
+ def _is_valid_pubstate(value: Any) -> TypeGuard[TPubState]:
172
+ """
173
+ TypeGuard function to validate if a value is a valid publication state.
174
+ """
175
+ return isinstance(value, str) and value in PUB_STATE_VALUES
176
+
177
+
178
+ def parse_pubstate(text: str) -> TPubState:
179
+ """
180
+ Parse publication state from a string.
181
+ """
182
+ if _is_valid_pubstate(text):
183
+ return text
184
+ else:
185
+ return ""
186
+
187
+
188
+ def _is_valid_epoch(value: Any) -> TypeGuard[TEpoch]:
189
+ """
190
+ TypeGuard function to validate if a value is a valid epoch.
191
+ """
192
+ return isinstance(value, str) and value in EPOCH_VALUES
193
+
194
+
195
+ def parse_epoch(text: str) -> TEpoch:
196
+ """
197
+ Parse epoch from a string.
198
+ """
199
+ if _is_valid_epoch(text):
200
+ return text
201
+ else:
202
+ return ""
203
+
204
+
205
+ def _is_valid_language_id(value: Any) -> TypeGuard[TLanguageID]:
206
+ """
207
+ TypeGuard function to validate if a value is a valid language ID.
208
+ """
209
+ return isinstance(value, str) and value in LANGUAGE_ID_VALUES
210
+
211
+
212
+ def parse_language_id(text: str) -> TLanguageID:
213
+ """
214
+ Parse language ID from a string.
215
+ """
216
+ if _is_valid_language_id(text):
217
+ return text
218
+ else:
219
+ return ""
220
+
221
+
222
+ def _create_bibstring_attr(value: str, bibstring_type: TBibString) -> BibStringAttr:
223
+ """
224
+ Create a BibStringAttr with the correct field set based on bibstring_type.
225
+ """
226
+ if bibstring_type == "latex":
227
+ return BibStringAttr(latex=value)
228
+ elif bibstring_type == "unicode":
229
+ return BibStringAttr(unicode=value)
230
+ else: # simplified
231
+ return BibStringAttr(simplified=value)
232
+
233
+
234
+ def parse_bibitem(data: ParsedBibItemData, bibstring_type: TBibString = "latex") -> Ok[BibItem] | Err:
235
+ """
236
+ Parse a bibitem from a dictionary of string fields into a BibItem object.
237
+ """
238
+ try:
239
+ # Parse bibkey
240
+ bibkey = None
241
+ if data.get("bibkey"):
242
+ bibkey_result = parse_bibkey(data["bibkey"])
243
+ if isinstance(bibkey_result, Err):
244
+ return bibkey_result
245
+ bibkey = bibkey_result.out
246
+
247
+ # Parse authors
248
+ authors: tuple[Author, ...] = ()
249
+ if data.get("author"):
250
+ author_result = parse_author(data["author"], bibstring_type)
251
+ if isinstance(author_result, Err):
252
+ return author_result
253
+ authors = author_result.out
254
+
255
+ # Parse editors
256
+ editors: tuple[Author, ...] = ()
257
+ if data.get("editor"):
258
+ editor_result = parse_author(data["editor"], bibstring_type)
259
+ if isinstance(editor_result, Err):
260
+ return editor_result
261
+ editors = editor_result.out
262
+
263
+ # Parse guest editors
264
+ guesteditors: tuple[Author, ...] = ()
265
+ if data.get("_guesteditor"):
266
+ guesteditor_result = parse_author(data["_guesteditor"], bibstring_type)
267
+ if isinstance(guesteditor_result, Err):
268
+ return guesteditor_result
269
+ guesteditors = guesteditor_result.out
270
+
271
+ # Parse person
272
+ person = None
273
+ if data.get("_person"):
274
+ person_result = parse_author(data["_person"], bibstring_type)
275
+ if isinstance(person_result, Err):
276
+ return person_result
277
+ if person_result.out:
278
+ person = person_result.out[0]
279
+
280
+ # Parse date
281
+ date: BibItemDateAttr | Literal["no date"] = BibItemDateAttr(year=0)
282
+ if data.get("date"):
283
+ date_result = parse_date(data["date"])
284
+ if isinstance(date_result, Err):
285
+ return date_result
286
+ date = date_result.out
287
+
288
+ # Parse pages
289
+ pages: tuple[PageAttr, ...] = ()
290
+ if data.get("pages"):
291
+ pages_result = parse_pages(data["pages"])
292
+ if isinstance(pages_result, Err):
293
+ return pages_result
294
+ pages = pages_result.out
295
+
296
+ # Parse journal
297
+ journal = None
298
+ if data.get("journal"):
299
+ journal_result = parse_journal(data["journal"], bibstring_type)
300
+ if isinstance(journal_result, Err):
301
+ return journal_result
302
+ journal = journal_result.out
303
+
304
+ # Parse crossref - for now, we'll skip complex crossref parsing and set to empty string
305
+ # TODO: Implement proper crossref parsing if needed
306
+
307
+ # Parse further_refs and depends_on
308
+ further_refs = parse_bibkey_list(data.get("_further_refs", ""))
309
+ depends_on = parse_bibkey_list(data.get("_depends_on", ""))
310
+
311
+ # Parse keywords
312
+ keywords = parse_keywords(data.get("_kw_level1", ""), data.get("_kw_level2", ""), data.get("_kw_level3", ""))
313
+
314
+ # Parse edition
315
+ edition = None
316
+ if data.get("edition"):
317
+ edition_str = data["edition"].strip()
318
+ if edition_str:
319
+ edition = int(edition_str)
320
+
321
+ # Parse numeric fields
322
+ dltc_num = None
323
+ if data.get("_dltc_num"):
324
+ dltc_num_str = data["_dltc_num"].strip()
325
+ if dltc_num_str:
326
+ dltc_num = int(dltc_num_str)
327
+
328
+ num_inwork_coll = None
329
+ if data.get("_num_inwork_coll"):
330
+ num_inwork_coll_str = data["_num_inwork_coll"].strip()
331
+ if num_inwork_coll_str:
332
+ num_inwork_coll = int(num_inwork_coll_str)
333
+
334
+ num_coll = None
335
+ if data.get("_num_coll"):
336
+ num_coll_str = data["_num_coll"].strip()
337
+ if num_coll_str:
338
+ num_coll = int(num_coll_str)
339
+
340
+ num_sort = None
341
+ if data.get("_num_sort"):
342
+ num_sort_str = data["_num_sort"].strip()
343
+ if num_sort_str:
344
+ num_sort = int(num_sort_str)
345
+
346
+ # Parse series
347
+ series: BaseNamedRenderable | Literal[""] = ""
348
+ if data.get("series"):
349
+ series_attr = _create_bibstring_attr(data["series"], bibstring_type)
350
+ series = BaseNamedRenderable(name=series_attr, id=None)
351
+
352
+ # Create BibItem
353
+ bibitem = BibItem(
354
+ to_do_general=data.get("_to_do_general", ""),
355
+ change_request=data.get("_change_request", ""),
356
+ entry_type=parse_entry_type(data.get("entry_type", "")),
357
+ bibkey=bibkey or "",
358
+ author=authors,
359
+ editor=editors,
360
+ options=parse_options(data.get("options", "")),
361
+ date=date,
362
+ pubstate=parse_pubstate(data.get("pubstate", "")),
363
+ title=_create_bibstring_attr(data["title"], bibstring_type) if data.get("title") else "",
364
+ booktitle=_create_bibstring_attr(data["booktitle"], bibstring_type) if data.get("booktitle") else "",
365
+ crossref="",
366
+ journal=journal,
367
+ volume=data.get("volume", ""),
368
+ number=data.get("number", ""),
369
+ pages=pages,
370
+ eid=data.get("eid", ""),
371
+ series=series,
372
+ address=_create_bibstring_attr(data["address"], bibstring_type) if data.get("address") else "",
373
+ institution=_create_bibstring_attr(data["institution"], bibstring_type) if data.get("institution") else "",
374
+ school=_create_bibstring_attr(data["school"], bibstring_type) if data.get("school") else "",
375
+ publisher=_create_bibstring_attr(data["publisher"], bibstring_type) if data.get("publisher") else "",
376
+ type=_create_bibstring_attr(data["type"], bibstring_type) if data.get("type") else "",
377
+ edition=edition,
378
+ note=_create_bibstring_attr(data["note"], bibstring_type) if data.get("note") else "",
379
+ issuetitle=_create_bibstring_attr(data["_issuetitle"], bibstring_type) if data.get("_issuetitle") else "",
380
+ guesteditor=guesteditors,
381
+ extra_note=_create_bibstring_attr(data["_extra_note"], bibstring_type) if data.get("_extra_note") else "",
382
+ urn=data.get("urn", ""),
383
+ eprint=data.get("eprint", ""),
384
+ doi=data.get("doi", ""),
385
+ url=data.get("url", ""),
386
+ kws=keywords or "",
387
+ epoch=parse_epoch(data.get("_epoch", "")),
388
+ person=person or "",
389
+ comm_for_profile_bib=data.get("_comm_for_profile_bib", ""),
390
+ langid=parse_language_id(data.get("_langid", "")),
391
+ lang_der=data.get("_lang_der", ""),
392
+ further_refs=further_refs,
393
+ depends_on=depends_on,
394
+ dltc_num=dltc_num,
395
+ spec_interest=data.get("_spec_interest", ""),
396
+ note_perso=data.get("_note_perso", ""),
397
+ note_stock=data.get("_note_stock", ""),
398
+ note_status=data.get("_note_status", ""),
399
+ num_inwork_coll=num_inwork_coll,
400
+ num_inwork=data.get("_num_inwork", ""),
401
+ num_coll=num_coll,
402
+ dltc_copyediting_note=data.get("_dltc_copyediting_note", ""),
403
+ note_missing=data.get("_note_missing", ""),
404
+ num_sort=num_sort,
405
+ )
406
+
407
+ return Ok(bibitem)
408
+
409
+ except Exception as e:
410
+ return Err(
411
+ message=f"Failed to parse bibitem: {e.__class__.__name__}: {e}",
412
+ code=-1,
413
+ error_type="BibItemParsingError",
414
+ error_trace=traceback.format_exc(),
415
+ )
@@ -0,0 +1,25 @@
1
+ from aletk.utils import get_logger
2
+ from philoch_bib_sdk.logic.models import Journal, Maybe, TBibString
3
+
4
+ lgr = get_logger(__name__)
5
+
6
+
7
+ def format_journal(journal: Maybe[Journal], bibstring_type: TBibString) -> str:
8
+ """
9
+ Format a journal object into a string representation.
10
+ """
11
+
12
+ match journal:
13
+
14
+ case None:
15
+ return ""
16
+
17
+ case Journal(name, id):
18
+
19
+ if not name:
20
+ return ""
21
+
22
+ return f"{getattr(name, bibstring_type)}"
23
+
24
+ case _:
25
+ raise TypeError(f"Invalid type for journal: {type(journal)}. Dump: {journal!r}")
@@ -0,0 +1,36 @@
1
+ import traceback
2
+ from aletk.ResultMonad import Ok, Err
3
+ from aletk.utils import get_logger, remove_extra_whitespace
4
+ from philoch_bib_sdk.logic.models import Journal, BibStringAttr, TBibString
5
+
6
+ lgr = get_logger(__name__)
7
+
8
+
9
+ def parse_journal(text: str, bibstring_type: TBibString) -> Ok[Journal | None] | Err:
10
+ """
11
+ Parse a journal string into a Journal object.
12
+ """
13
+ try:
14
+ if text == "":
15
+ lgr.debug("Empty journal string, returning None.")
16
+ return Ok(None)
17
+
18
+ # Normalize the text by removing extra whitespace
19
+ normalized_text = remove_extra_whitespace(text)
20
+
21
+ journal = Journal(
22
+ name=BibStringAttr(**{str(bibstring_type): normalized_text}),
23
+ issn_electronic="",
24
+ issn_print="",
25
+ )
26
+
27
+ return Ok(journal)
28
+
29
+ except Exception as e:
30
+ error_message = f"Error parsing journal string '{text}': {e}"
31
+ return Err(
32
+ message=error_message,
33
+ code=-1,
34
+ error_type=f"{e.__class__.__name__}",
35
+ error_trace=traceback.format_exc(),
36
+ )
@@ -0,0 +1,25 @@
1
+ from philoch_bib_sdk.logic.models import BaseRenderable, BaseNamedRenderable, TBibString
2
+
3
+
4
+ def format_renderable(
5
+ renderable: BaseRenderable | BaseNamedRenderable,
6
+ bibstring_type: TBibString,
7
+ ) -> str:
8
+ """
9
+ Format a base renderable object into a string representation.
10
+ """
11
+
12
+ match renderable:
13
+
14
+ case BaseRenderable(text, id):
15
+ if not text:
16
+ return ""
17
+ return f"{getattr(text, bibstring_type)}"
18
+
19
+ case BaseNamedRenderable(name, id):
20
+ if not name:
21
+ return ""
22
+ return f"{getattr(name, bibstring_type)}"
23
+
24
+ case _:
25
+ raise TypeError("Invalid type for renderable")
@@ -0,0 +1,3 @@
1
+ """CLI interfaces for bibliography tools."""
2
+
3
+ __all__: list[str] = []
@@ -0,0 +1,135 @@
1
+ """CLI interface for fuzzy matching bibliographic items.
2
+
3
+ This module provides a command-line interface for matching new bibliographic entries
4
+ against an existing bibliography using fuzzy matching.
5
+
6
+ Usage:
7
+ poetry run python -m philoch_bib_sdk.interfaces.cli.fuzzy_matching \\
8
+ --bibliography path/to/bibliography.csv \\
9
+ --input path/to/new_items.csv \\
10
+ --output path/to/report \\
11
+ [--format csv] \\
12
+ [--top-n 5] \\
13
+ [--min-score 100.0]
14
+ """
15
+
16
+ import argparse
17
+ import sys
18
+ from functools import partial
19
+
20
+ from aletk.ResultMonad import Err, main_try_except_wrapper
21
+ from aletk.utils import get_logger
22
+
23
+ from philoch_bib_sdk.adapters.io import load_bibliography, load_staged, write_report
24
+ from philoch_bib_sdk.procedures.fuzzy_matching import fuzzy_match_procedure
25
+
26
+ logger = get_logger(__name__)
27
+
28
+
29
+ @main_try_except_wrapper(logger)
30
+ def cli() -> None:
31
+ """Command-line interface for fuzzy matching.
32
+
33
+ Returns:
34
+ None on success (raises exception on failure)
35
+ """
36
+ parser = argparse.ArgumentParser(
37
+ description="Fuzzy match bibliographic items against an existing bibliography.",
38
+ formatter_class=argparse.RawDescriptionHelpFormatter,
39
+ epilog="""
40
+ Examples:
41
+ # Basic usage with CSV files
42
+ %(prog)s --bibliography refs.csv --input new_refs.csv --output matches
43
+
44
+ # With custom matching parameters
45
+ %(prog)s --bibliography refs.csv --input new_refs.csv --output matches \\
46
+ --top-n 10 --min-score 200.0
47
+
48
+ # Specify output format explicitly
49
+ %(prog)s --bibliography refs.csv --input new_refs.csv --output matches \\
50
+ --format csv
51
+ """,
52
+ )
53
+
54
+ parser.add_argument(
55
+ "--bibliography",
56
+ required=True,
57
+ help="Path to bibliography file (format auto-detected from extension)",
58
+ )
59
+
60
+ parser.add_argument(
61
+ "--input",
62
+ required=True,
63
+ help="Path to input file with new items to match",
64
+ )
65
+
66
+ parser.add_argument(
67
+ "--output",
68
+ required=True,
69
+ help="Path to output report file (without extension)",
70
+ )
71
+
72
+ parser.add_argument(
73
+ "--format",
74
+ default="csv",
75
+ choices=["csv"],
76
+ help="Output format (default: csv)",
77
+ )
78
+
79
+ parser.add_argument(
80
+ "--top-n",
81
+ type=int,
82
+ default=5,
83
+ help="Number of top matches to return per item (default: 5)",
84
+ )
85
+
86
+ parser.add_argument(
87
+ "--min-score",
88
+ type=float,
89
+ default=0.0,
90
+ help="Minimum score threshold for matches (default: 0.0)",
91
+ )
92
+
93
+ args = parser.parse_args()
94
+
95
+ # Validate parameters
96
+ if args.top_n < 1:
97
+ raise ValueError("--top-n must be at least 1")
98
+
99
+ if args.min_score < 0:
100
+ raise ValueError("--min-score must be non-negative")
101
+
102
+ # Create write_report function with format bound
103
+ write_report_with_format = partial(write_report, output_format=args.format)
104
+
105
+ # Execute procedure
106
+ logger.info("Starting fuzzy matching CLI")
107
+ result = fuzzy_match_procedure(
108
+ bibliography_path=args.bibliography,
109
+ staged_path=args.input,
110
+ output_path=args.output,
111
+ load_bibliography=load_bibliography,
112
+ load_staged=load_staged,
113
+ write_report=write_report_with_format,
114
+ top_n=args.top_n,
115
+ min_score=args.min_score,
116
+ )
117
+
118
+ # Handle result - raise exception if procedure failed
119
+ if isinstance(result, Err):
120
+ raise RuntimeError(result.message)
121
+
122
+ print(f"Success! Report written to {args.output}.{args.format}")
123
+ logger.info("Fuzzy matching completed successfully")
124
+
125
+
126
+ def main() -> None:
127
+ """Entry point for CLI when run as script."""
128
+ result = cli()
129
+ if isinstance(result, Err):
130
+ sys.exit(result.code if result.code > 0 else 1)
131
+ sys.exit(0)
132
+
133
+
134
+ if __name__ == "__main__":
135
+ main()
@@ -0,0 +1,39 @@
1
+ """Logic layer for bibliography SDK."""
2
+
3
+ from philoch_bib_sdk.logic.literals import TBibTeXEntryType
4
+ from philoch_bib_sdk.logic.models import (
5
+ Author,
6
+ BibItem,
7
+ BibItemDateAttr,
8
+ BibKeyAttr,
9
+ BibStringAttr,
10
+ Journal,
11
+ Maybe,
12
+ PageAttr,
13
+ TBibString,
14
+ )
15
+ from philoch_bib_sdk.logic.models_staging import (
16
+ BibItemStaged,
17
+ Match,
18
+ PartialScore,
19
+ ScoreComponent,
20
+ )
21
+
22
+ __all__ = [
23
+ # Core models
24
+ "Author",
25
+ "BibItem",
26
+ "BibItemDateAttr",
27
+ "BibKeyAttr",
28
+ "BibStringAttr",
29
+ "Journal",
30
+ "Maybe",
31
+ "PageAttr",
32
+ "TBibString",
33
+ "TBibTeXEntryType",
34
+ # Staging models
35
+ "BibItemStaged",
36
+ "Match",
37
+ "PartialScore",
38
+ "ScoreComponent",
39
+ ]