philoch-bib-sdk 0.3.9__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. philoch_bib_sdk/__init__.py +0 -0
  2. philoch_bib_sdk/_rust.cp313-win_amd64.pyd +0 -0
  3. philoch_bib_sdk/adapters/io/__init__.py +115 -0
  4. philoch_bib_sdk/adapters/io/csv/__init__.py +308 -0
  5. philoch_bib_sdk/adapters/io/ods/__init__.py +145 -0
  6. philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
  7. philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +58 -0
  8. philoch_bib_sdk/converters/latex.py +6 -0
  9. philoch_bib_sdk/converters/plaintext/author/formatter.py +34 -0
  10. philoch_bib_sdk/converters/plaintext/author/parser.py +83 -0
  11. philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +8 -0
  12. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +21 -0
  13. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +158 -0
  14. philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +37 -0
  15. philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +62 -0
  16. philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +182 -0
  17. philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +13 -0
  18. philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +63 -0
  19. philoch_bib_sdk/converters/plaintext/bibitem/parser.py +415 -0
  20. philoch_bib_sdk/converters/plaintext/journal/formatter.py +25 -0
  21. philoch_bib_sdk/converters/plaintext/journal/parser.py +36 -0
  22. philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +25 -0
  23. philoch_bib_sdk/interfaces/cli/__init__.py +3 -0
  24. philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +135 -0
  25. philoch_bib_sdk/logic/__init__.py +39 -0
  26. philoch_bib_sdk/logic/default_models.py +315 -0
  27. philoch_bib_sdk/logic/functions/__init__.py +31 -0
  28. philoch_bib_sdk/logic/functions/comparator.py +414 -0
  29. philoch_bib_sdk/logic/functions/fuzzy_matcher.py +796 -0
  30. philoch_bib_sdk/logic/functions/journal_article_matcher.py +44 -0
  31. philoch_bib_sdk/logic/literals.py +98 -0
  32. philoch_bib_sdk/logic/models.py +366 -0
  33. philoch_bib_sdk/logic/models_staging.py +173 -0
  34. philoch_bib_sdk/procedures/fuzzy_matching.py +112 -0
  35. philoch_bib_sdk/py.typed +0 -0
  36. philoch_bib_sdk/rust_scorer/Cargo.lock +232 -0
  37. philoch_bib_sdk/rust_scorer/Cargo.toml +26 -0
  38. philoch_bib_sdk/rust_scorer/pyproject.toml +15 -0
  39. philoch_bib_sdk/rust_scorer/rust_scorer.pyi +65 -0
  40. philoch_bib_sdk/rust_scorer/src/lib.rs +362 -0
  41. philoch_bib_sdk-0.3.9.dist-info/METADATA +15 -0
  42. philoch_bib_sdk-0.3.9.dist-info/RECORD +44 -0
  43. philoch_bib_sdk-0.3.9.dist-info/WHEEL +4 -0
  44. philoch_bib_sdk-0.3.9.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,44 @@
1
+ from typing import Callable, Dict, Tuple
2
+ from philoch_bib_sdk.converters.plaintext.journal.formatter import format_journal
3
+ from philoch_bib_sdk.logic.models import BibItem, BibKeyAttr
4
+
5
+
6
+ type TJournalName = str
7
+
8
+ type TVolume = str
9
+
10
+ type TNumber = str
11
+
12
+ type TBibkey = str
13
+
14
+
15
+ type TJournalBibkeyIndex = Dict[
16
+ Tuple[TJournalName, TVolume, TNumber], BibKeyAttr
17
+ ] # (journal, volume, number) # bibkey
18
+
19
+
20
+ def get_bibkey_by_journal_volume_number(index: TJournalBibkeyIndex, subject: BibItem) -> BibKeyAttr:
21
+ """
22
+ Simple lookup of a Bibitem on an index for its bibkey, via the combination (journal_name, volume, number). Fails if any of the three fields are missing.
23
+ """
24
+
25
+ # TODO: need to ensure the index is unique, possibly via some fuzzy matching with the title or the author
26
+
27
+ journal = format_journal(subject.journal, bibstring_type="latex")
28
+ volume = subject.volume
29
+ number = subject.number
30
+
31
+ if any((journal == "", volume == "", number == "")):
32
+ raise ValueError(
33
+ f"Expected subject bibitem journal with non-empty journal, volume, and number. Found [[ journal: {journal}; volume: {volume}; number: {number} ]] instead."
34
+ )
35
+
36
+ return index[(journal, volume, number)]
37
+
38
+
39
+ type TReadIndex = Callable[
40
+ [
41
+ str, # path to the index file
42
+ ],
43
+ TJournalBibkeyIndex,
44
+ ]
@@ -0,0 +1,98 @@
1
+ from typing import Literal, Tuple, get_args
2
+
3
+ TBibTeXEntryType = Literal[
4
+ "article",
5
+ "book",
6
+ "incollection",
7
+ "inproceedings",
8
+ "mastersthesis",
9
+ "misc",
10
+ "phdthesis",
11
+ "proceedings",
12
+ "techreport",
13
+ "unpublished",
14
+ "UNKNOWN",
15
+ ]
16
+
17
+ TBasicPubState = Literal[
18
+ "",
19
+ "unpub",
20
+ "forthcoming",
21
+ ]
22
+
23
+ TPubState = Literal[
24
+ "",
25
+ "unpub",
26
+ "forthcoming",
27
+ "inwork",
28
+ "submitted",
29
+ "published",
30
+ ]
31
+
32
+ TLanguageID = Literal[
33
+ "",
34
+ "catalan",
35
+ "czech",
36
+ "danish",
37
+ "dutch",
38
+ "english",
39
+ "french",
40
+ "greek",
41
+ "italian",
42
+ "latin",
43
+ "lithuanian",
44
+ "ngerman",
45
+ "polish",
46
+ "portuguese",
47
+ "romanian",
48
+ "russian",
49
+ "slovak",
50
+ "spanish",
51
+ "swedish",
52
+ "unknown",
53
+ ]
54
+
55
+ TEpoch = Literal[
56
+ "",
57
+ "ancient-philosophy",
58
+ "ancient-scientists",
59
+ "austrian-philosophy",
60
+ "british-idealism",
61
+ "classics",
62
+ "contemporaries",
63
+ "contemporary-scientists",
64
+ "continental-philosophy",
65
+ "critical-theory",
66
+ "cynics",
67
+ "enlightenment",
68
+ "existentialism",
69
+ "exotic-philosophy",
70
+ "german-idealism",
71
+ "german-rationalism",
72
+ "gestalt-psychology",
73
+ "hermeneutics",
74
+ "islamic-philosophy",
75
+ "mathematicians",
76
+ "medieval-philosophy",
77
+ "modern-philosophy",
78
+ "modern-scientists",
79
+ "neokantianism",
80
+ "neo-kantianism",
81
+ "neoplatonism",
82
+ "new-realism",
83
+ "ordinary-language-philosophy",
84
+ "phenomenology",
85
+ "polish-logic",
86
+ "pragmatism",
87
+ "presocratics",
88
+ "renaissance",
89
+ "stoics",
90
+ "theologians",
91
+ "vienna-circle",
92
+ ]
93
+
94
+ # Literal value constants for runtime validation
95
+ BIBTEX_ENTRY_TYPE_VALUES: Tuple[TBibTeXEntryType, ...] = get_args(TBibTeXEntryType)
96
+ PUB_STATE_VALUES: Tuple[TPubState, ...] = get_args(TPubState)
97
+ EPOCH_VALUES: Tuple[TEpoch, ...] = get_args(TEpoch)
98
+ LANGUAGE_ID_VALUES: Tuple[TLanguageID, ...] = get_args(TLanguageID)
@@ -0,0 +1,366 @@
1
+ from __future__ import annotations
2
+ from typing import Literal, Tuple, get_args
3
+ import attrs
4
+
5
+ from philoch_bib_sdk.logic.literals import TBasicPubState, TBibTeXEntryType, TEpoch, TLanguageID, TPubState
6
+
7
+
8
+ type Maybe[T] = T | None
9
+ type MaybeStr[T] = T | Literal[""]
10
+
11
+
12
+ @attrs.define(frozen=True, slots=True)
13
+ class BibStringAttr:
14
+ """
15
+ A representation of the different forms of a string we may need for different purposes.
16
+
17
+ Args:
18
+ latex: formatted string for LaTeX, can be used in bib files
19
+ unicode: formatted string for Unicode, can be used in text. Produced from the LaTeX string
20
+ simplified: simplified string, can be used to match strings. Produced from the Unicode string
21
+ """
22
+
23
+ latex: str = ""
24
+ unicode: str = ""
25
+ simplified: str = ""
26
+
27
+
28
+ BibStringLiteral = Literal["latex", "unicode", "simplified"]
29
+
30
+ type TBibString = BibStringLiteral
31
+ BIB_STRING_VALUES: Tuple[str, ...] = get_args(BibStringLiteral)
32
+
33
+
34
+ ############
35
+ # Base Renderables
36
+ ############
37
+
38
+
39
+ @attrs.define(frozen=True, slots=True)
40
+ class BaseRenderable:
41
+ """
42
+ Base class for renderable objects that contain a single 'text' attribute.
43
+
44
+ Args:
45
+ text: BibString
46
+ id: Maybe[int] = None
47
+ """
48
+
49
+ text: BibStringAttr
50
+ id: Maybe[int] = None
51
+
52
+
53
+ @attrs.define(frozen=True, slots=True)
54
+ class BaseNamedRenderable:
55
+ """
56
+ Base class for renderable objects that contain a single 'name' attribute.
57
+
58
+ Args:
59
+ name: BibString
60
+ id: Maybe[int] = None
61
+ """
62
+
63
+ name: BibStringAttr
64
+ id: Maybe[int] = None
65
+
66
+
67
+ RenderablesLiteral = Literal["text", "name"]
68
+
69
+ type TRenderable = RenderablesLiteral
70
+ RENDERABLES_VALUES: Tuple[str, ...] = get_args(RenderablesLiteral)
71
+
72
+
73
+ ############
74
+ # Author
75
+ ############
76
+
77
+
78
+ @attrs.define(frozen=True, slots=True)
79
+ class Author:
80
+ """
81
+ An author of a publication.
82
+
83
+ Args:
84
+ given_name: BibStringAttr
85
+ family_name: BibStringAttr
86
+ given_name_latex: BibStringAttr
87
+ family_name_latex: BibStringAttr
88
+ publications: Tuple[BibItem] = []
89
+ id: Maybe[int] = None
90
+ """
91
+
92
+ given_name: BibStringAttr
93
+ family_name: BibStringAttr
94
+ mononym: BibStringAttr
95
+ shorthand: BibStringAttr
96
+ famous_name: BibStringAttr
97
+ publications: Tuple[BibItem, ...]
98
+ id: Maybe[int] = None
99
+
100
+
101
+ ############
102
+ # Journal
103
+ ############
104
+
105
+
106
+ @attrs.define(frozen=True, slots=True)
107
+ class Journal:
108
+ """
109
+ A journal that publishes publications.
110
+
111
+ Args:
112
+ name: BibStringAttr
113
+ name_latex: str
114
+ issn_print: str
115
+ issn_electronic: str
116
+ id: Maybe[int] = None
117
+ """
118
+
119
+ name: BibStringAttr
120
+ issn_print: str
121
+ issn_electronic: str
122
+ id: Maybe[int] = None
123
+
124
+
125
+ ############
126
+ # Keyword
127
+ ############
128
+
129
+
130
+ @attrs.define(frozen=True, slots=True)
131
+ class Keyword:
132
+ """
133
+ Keyword of a publication.
134
+
135
+ Args:
136
+ name: str
137
+ id: Maybe[int] = None
138
+ """
139
+
140
+ name: str
141
+ id: Maybe[int] = None
142
+
143
+
144
+ ############
145
+ # BibItem
146
+ ############
147
+
148
+
149
+ class BibKeyValidationError(Exception):
150
+ pass
151
+
152
+
153
+ @attrs.define(frozen=True, slots=True)
154
+ class BibKeyAttr:
155
+ """
156
+ A unique identifier for a publication.
157
+
158
+ Args:
159
+ first_author: str
160
+ other_authors: str
161
+ date: int | TBasicPubStatus
162
+ date_suffix: str
163
+ """
164
+
165
+ first_author: str
166
+ other_authors: str
167
+ date: int | TBasicPubState
168
+ date_suffix: str
169
+
170
+ def __attrs_post_init__(self) -> None:
171
+ if not self.first_author or not self.date:
172
+ raise BibKeyValidationError("Both 'first_author' and 'date' must not be empty.")
173
+
174
+
175
+ class BibItemDateValidationError(Exception):
176
+ pass
177
+
178
+
179
+ @attrs.define(frozen=True, slots=True)
180
+ class BibItemDateAttr:
181
+ """
182
+ Year of a publication.
183
+
184
+ Example:
185
+ BibItemDate(year=2021, year_revised=2022) represents `2021/2022`.
186
+ BibItemDate(year=2021, month=1, day=1) represents `2021-01-01`.
187
+
188
+ Args:
189
+ year: int
190
+ year_part_2_hyphen: Maybe[int] = None
191
+ year_part_2_slash: Maybe[int] = None
192
+ month: Maybe[int] = None
193
+ day: Maybe[int] = None
194
+ """
195
+
196
+ year: int
197
+ year_part_2_hyphen: Maybe[int] = None
198
+ year_part_2_slash: Maybe[int] = None
199
+ month: Maybe[int] = None
200
+ day: Maybe[int] = None
201
+
202
+ def __attrs_post_init__(self) -> None:
203
+ if any([self.year_part_2_hyphen, self.year_part_2_slash]) and not self.year:
204
+ raise BibItemDateValidationError(
205
+ "If 'year_part_2_hyphens' or 'year_part_2_slash' is set, 'year' must not be empty."
206
+ )
207
+
208
+ if not ((self.month and self.day) or (not self.month and not self.day)):
209
+ raise BibItemDateValidationError("If 'day' is set, 'month' must be set too, and vice versa.")
210
+
211
+ if self.month and not self.year:
212
+ raise BibItemDateValidationError("If 'month' is set, 'year' must not be empty.")
213
+
214
+ if self.year_part_2_hyphen and self.year_part_2_slash:
215
+ raise BibItemDateValidationError("If 'year_part_2_hyphen' is set, 'year_part_2_slash' must not be set.")
216
+
217
+
218
+ VALID_DATE_FORMATS = [
219
+ "{year}",
220
+ "{year_1}-{year_2}",
221
+ "{year}/{year_2}",
222
+ "{year}-{month}-{day}",
223
+ "{year}-{month}",
224
+ ]
225
+
226
+
227
+ @attrs.define(frozen=True, slots=True)
228
+ class KeywordsAttr:
229
+ """
230
+ Keywords of a publication.
231
+
232
+ Args:
233
+ level_1: Keyword
234
+ level_2: Keyword
235
+ level_3: Keyword
236
+ """
237
+
238
+ level_1: Keyword
239
+ level_2: Keyword
240
+ level_3: Keyword
241
+
242
+
243
+ class PageValidationError(Exception):
244
+ pass
245
+
246
+
247
+ @attrs.define(frozen=True, slots=True)
248
+ class PageAttr:
249
+ """
250
+ Page numbers of a publication. Can be a range, roman numerals, or a single page.
251
+
252
+ Args:
253
+ start: str
254
+ end: str
255
+ """
256
+
257
+ start: str
258
+ end: str
259
+
260
+ def __attrs_post_init__(self) -> None:
261
+ if self.end and not self.start:
262
+ raise PageValidationError("If 'end' is set, 'start' must not be empty.")
263
+
264
+
265
+ class BibItemValidationError(Exception):
266
+ pass
267
+
268
+
269
+ @attrs.define(frozen=True, slots=True)
270
+ class BibItem:
271
+ """
272
+ Bibliographic item type. All attributes are optional.
273
+
274
+ Args:
275
+
276
+ """
277
+
278
+ # Normal string fields
279
+ _to_do_general: str
280
+ _change_request: str
281
+
282
+ # Official fields, may be stored in different formats
283
+ entry_type: TBibTeXEntryType
284
+ bibkey: MaybeStr[BibKeyAttr]
285
+ author: Tuple[Author, ...]
286
+ editor: Tuple[Author, ...]
287
+ options: Tuple[str, ...]
288
+ # shorthand: BibStringAttr # Mononym of the author
289
+ date: BibItemDateAttr | Literal["no date"]
290
+ pubstate: TPubState
291
+ title: MaybeStr[BibStringAttr]
292
+ booktitle: MaybeStr[BibStringAttr]
293
+ crossref: MaybeStr[CrossrefBibItemAttr]
294
+ journal: Maybe[Journal]
295
+ volume: str
296
+ number: str
297
+ pages: Tuple[PageAttr, ...]
298
+ eid: str
299
+ series: MaybeStr[BaseNamedRenderable]
300
+ address: MaybeStr[BibStringAttr]
301
+ institution: MaybeStr[BibStringAttr]
302
+ school: MaybeStr[BibStringAttr]
303
+ publisher: MaybeStr[BibStringAttr]
304
+ type: MaybeStr[BibStringAttr]
305
+ edition: Maybe[int]
306
+ note: MaybeStr[BibStringAttr]
307
+ issuetitle: MaybeStr[BibStringAttr]
308
+ _guesteditor: Tuple[Author, ...] # Custom field
309
+ _extra_note: MaybeStr[BibStringAttr] # Custom field
310
+ urn: str
311
+ eprint: str
312
+ doi: str
313
+ url: str
314
+
315
+ # String fields
316
+ _kws: MaybeStr[KeywordsAttr]
317
+ _epoch: TEpoch
318
+ _person: MaybeStr[Author]
319
+ _comm_for_profile_bib: str
320
+ _langid: TLanguageID
321
+ _lang_der: str
322
+ _further_refs: Tuple[BibKeyAttr, ...]
323
+ _depends_on: Tuple[BibKeyAttr, ...]
324
+ _dltc_num: Maybe[int]
325
+ _spec_interest: str
326
+ _note_perso: str
327
+ _note_stock: str
328
+ _note_status: str
329
+ _num_inwork_coll: Maybe[int]
330
+ _num_inwork: str
331
+ _num_coll: Maybe[int]
332
+ _dltc_copyediting_note: str
333
+ _note_missing: str
334
+ _num_sort: Maybe[int]
335
+
336
+ # Additional fields
337
+ id: Maybe[int] = None
338
+ _bib_info_source: str = ""
339
+
340
+ def __attrs_post_init__(self) -> None:
341
+
342
+ if self.crossref and self.bibkey == self.crossref.bibkey:
343
+ raise BibItemValidationError("Crossref bibkey must be different from the main bibkey.")
344
+
345
+
346
+ @attrs.define(frozen=True, slots=True)
347
+ class CrossrefBibItemAttr(BibItem):
348
+ """
349
+ A cross-reference to another bibliographic item.
350
+
351
+ Args:
352
+ bibkey: str
353
+ """
354
+
355
+ def __attrs_post_init__(self) -> None:
356
+ if self.entry_type != "book":
357
+ raise ValueError("Crossref must have a 'type' of 'book'.")
358
+
359
+ if not self.booktitle:
360
+ raise ValueError("Crossref must have a 'booktitle'.")
361
+
362
+ if not self.bibkey:
363
+ raise ValueError("Crossref must have a 'bibkey'.")
364
+
365
+ if self.crossref and self.bibkey == self.crossref.bibkey:
366
+ raise BibItemValidationError("Crossref bibkey must be different from the main bibkey.")
@@ -0,0 +1,173 @@
1
+ """Data models for staged bibliography matching.
2
+
3
+ This module provides models for tracking fuzzy matching results when comparing
4
+ new BibItems against an existing bibliography.
5
+ """
6
+
7
+ import json
8
+ from enum import StrEnum
9
+ from typing import Tuple, TypedDict
10
+
11
+ import attrs
12
+
13
+
14
+ class SearchMetadata(TypedDict, total=False):
15
+ """Metadata about a fuzzy matching search operation.
16
+
17
+ Attributes:
18
+ search_time_ms: Time taken for the search in milliseconds
19
+ candidates_searched: Number of candidates evaluated
20
+ scorer: Which scorer was used ("rust" or "python")
21
+ """
22
+
23
+ search_time_ms: int
24
+ candidates_searched: int
25
+ scorer: str
26
+
27
+
28
+ from philoch_bib_sdk.converters.plaintext.author.formatter import format_author
29
+ from philoch_bib_sdk.logic.models import BibItem
30
+
31
+
32
+ class ScoreComponent(StrEnum):
33
+ """Components used in calculating similarity scores between BibItems."""
34
+
35
+ TITLE = "title"
36
+ AUTHOR = "author"
37
+ DATE = "date"
38
+ DOI = "doi"
39
+ JOURNAL_VOLUME_NUMBER = "journal_volume_number"
40
+ PAGES = "pages"
41
+ PUBLISHER = "publisher"
42
+
43
+
44
+ @attrs.define(frozen=True, slots=True)
45
+ class PartialScore:
46
+ """Individual score component with weight and explanation.
47
+
48
+ Attributes:
49
+ component: The type of comparison (title, author, etc.)
50
+ score: Raw score value (before weighting)
51
+ weight: Weight factor applied to this component (0.0-1.0)
52
+ weighted_score: Final score after applying weight (score * weight)
53
+ details: Human-readable explanation of the score
54
+ """
55
+
56
+ component: ScoreComponent
57
+ score: int
58
+ weight: float
59
+ weighted_score: float
60
+ details: str
61
+
62
+
63
+ @attrs.define(frozen=True, slots=True)
64
+ class Match:
65
+ """A candidate match with full scoring breakdown.
66
+
67
+ Attributes:
68
+ bibkey: The bibliography key of the matched item
69
+ matched_bibitem: The full BibItem that was matched
70
+ total_score: Sum of all weighted partial scores
71
+ partial_scores: Detailed breakdown of each score component
72
+ rank: Position in the results (1-based, 1 = best match)
73
+ """
74
+
75
+ bibkey: str
76
+ matched_bibitem: BibItem
77
+ total_score: float
78
+ partial_scores: Tuple[PartialScore, ...]
79
+ rank: int
80
+
81
+ def to_json_summary(self) -> dict[str, object]:
82
+ """Convert match to a JSON-serializable summary.
83
+
84
+ Returns:
85
+ Dictionary with bibkey, rank, scores, and breakdown details
86
+ """
87
+ # Truncate long strings for readability in CSV
88
+ from philoch_bib_sdk.logic.models import BibStringAttr
89
+
90
+ title_attr = self.matched_bibitem.title
91
+ title = title_attr.simplified if isinstance(title_attr, BibStringAttr) else ""
92
+ title_truncated = title[:100] + "..." if len(title) > 100 else title
93
+
94
+ author_formatted = format_author(self.matched_bibitem.author, "simplified")
95
+ author_truncated = author_formatted[:100] + "..." if len(author_formatted) > 100 else author_formatted
96
+
97
+ return {
98
+ "bibkey": self.bibkey,
99
+ "rank": self.rank,
100
+ "total_score": round(self.total_score, 2),
101
+ "title": title_truncated,
102
+ "author": author_truncated,
103
+ "score_breakdown": {
104
+ ps.component.value: {
105
+ "score": ps.score,
106
+ "weight": ps.weight,
107
+ "weighted": round(ps.weighted_score, 2),
108
+ "details": ps.details,
109
+ }
110
+ for ps in self.partial_scores
111
+ },
112
+ }
113
+
114
+
115
+ @attrs.define(frozen=True, slots=True)
116
+ class BibItemStaged:
117
+ """A BibItem being matched against a bibliography.
118
+
119
+ Used for processing new/incoming bibliographic entries and comparing them
120
+ against an existing bibliography to find potential matches or duplicates.
121
+
122
+ Attributes:
123
+ bibitem: The new/incoming item to match
124
+ top_matches: Top N best matches found in the bibliography
125
+ search_metadata: Performance and search statistics
126
+ """
127
+
128
+ bibitem: BibItem
129
+ top_matches: Tuple[Match, ...]
130
+ search_metadata: SearchMetadata
131
+
132
+ def to_csv_row(self) -> dict[str, str | int | float]:
133
+ """Export as a flat CSV row with nested JSON for match details.
134
+
135
+ Returns:
136
+ Dictionary suitable for CSV writing with json-encoded top_matches
137
+ """
138
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_formatter import format_bibkey
139
+ from philoch_bib_sdk.logic.models import BibItemDateAttr, BibStringAttr
140
+
141
+ # Handle date formatting
142
+ date_str = ""
143
+ if self.bibitem.date != "no date":
144
+ date_obj = self.bibitem.date
145
+ if isinstance(date_obj, BibItemDateAttr):
146
+ date_str = str(date_obj.year)
147
+
148
+ # Get best match info if available
149
+ best_match_score = 0.0
150
+ best_match_bibkey = ""
151
+ if self.top_matches:
152
+ best_match_score = self.top_matches[0].total_score
153
+ best_match_bibkey = self.top_matches[0].bibkey
154
+
155
+ # Handle bibkey using formatter
156
+ bibkey_str = format_bibkey(self.bibitem.bibkey)
157
+
158
+ # Handle title
159
+ title_attr = self.bibitem.title
160
+ title_str = title_attr.simplified if isinstance(title_attr, BibStringAttr) else ""
161
+
162
+ return {
163
+ "staged_bibkey": bibkey_str,
164
+ "staged_title": title_str,
165
+ "staged_author": format_author(self.bibitem.author, "simplified"),
166
+ "staged_year": date_str,
167
+ "num_matches": len(self.top_matches),
168
+ "best_match_score": round(best_match_score, 2),
169
+ "best_match_bibkey": best_match_bibkey,
170
+ "top_matches_json": json.dumps(tuple(m.to_json_summary() for m in self.top_matches)),
171
+ "search_time_ms": self.search_metadata.get("search_time_ms", 0),
172
+ "candidates_searched": self.search_metadata.get("candidates_searched", 0),
173
+ }