philoch-bib-sdk 0.3.9__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. philoch_bib_sdk/__init__.py +0 -0
  2. philoch_bib_sdk/_rust.cp313-win_amd64.pyd +0 -0
  3. philoch_bib_sdk/adapters/io/__init__.py +115 -0
  4. philoch_bib_sdk/adapters/io/csv/__init__.py +308 -0
  5. philoch_bib_sdk/adapters/io/ods/__init__.py +145 -0
  6. philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
  7. philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +58 -0
  8. philoch_bib_sdk/converters/latex.py +6 -0
  9. philoch_bib_sdk/converters/plaintext/author/formatter.py +34 -0
  10. philoch_bib_sdk/converters/plaintext/author/parser.py +83 -0
  11. philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +8 -0
  12. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +21 -0
  13. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +158 -0
  14. philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +37 -0
  15. philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +62 -0
  16. philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +182 -0
  17. philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +13 -0
  18. philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +63 -0
  19. philoch_bib_sdk/converters/plaintext/bibitem/parser.py +415 -0
  20. philoch_bib_sdk/converters/plaintext/journal/formatter.py +25 -0
  21. philoch_bib_sdk/converters/plaintext/journal/parser.py +36 -0
  22. philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +25 -0
  23. philoch_bib_sdk/interfaces/cli/__init__.py +3 -0
  24. philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +135 -0
  25. philoch_bib_sdk/logic/__init__.py +39 -0
  26. philoch_bib_sdk/logic/default_models.py +315 -0
  27. philoch_bib_sdk/logic/functions/__init__.py +31 -0
  28. philoch_bib_sdk/logic/functions/comparator.py +414 -0
  29. philoch_bib_sdk/logic/functions/fuzzy_matcher.py +796 -0
  30. philoch_bib_sdk/logic/functions/journal_article_matcher.py +44 -0
  31. philoch_bib_sdk/logic/literals.py +98 -0
  32. philoch_bib_sdk/logic/models.py +366 -0
  33. philoch_bib_sdk/logic/models_staging.py +173 -0
  34. philoch_bib_sdk/procedures/fuzzy_matching.py +112 -0
  35. philoch_bib_sdk/py.typed +0 -0
  36. philoch_bib_sdk/rust_scorer/Cargo.lock +232 -0
  37. philoch_bib_sdk/rust_scorer/Cargo.toml +26 -0
  38. philoch_bib_sdk/rust_scorer/pyproject.toml +15 -0
  39. philoch_bib_sdk/rust_scorer/rust_scorer.pyi +65 -0
  40. philoch_bib_sdk/rust_scorer/src/lib.rs +362 -0
  41. philoch_bib_sdk-0.3.9.dist-info/METADATA +15 -0
  42. philoch_bib_sdk-0.3.9.dist-info/RECORD +44 -0
  43. philoch_bib_sdk-0.3.9.dist-info/WHEEL +4 -0
  44. philoch_bib_sdk-0.3.9.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,83 @@
1
+ import traceback
2
+ from typing import Tuple
3
+ from aletk.ResultMonad import Ok, Err
4
+ from aletk.utils import get_logger, remove_extra_whitespace
5
+ from philoch_bib_sdk.logic.models import Author, BibStringAttr, TBibString
6
+
7
+ lgr = get_logger(__name__)
8
+
9
+
10
+ def _parse_normalize(text: str) -> Tuple[str, str, str]:
11
+ """
12
+ Return a tuple of (given_name, family_name, mononym).
13
+
14
+ Handles formats:
15
+ - "Mononym" -> ("", "", "Mononym")
16
+ - "Family, Given" -> ("Given", "Family", "")
17
+ - "Family, Suffix, Given" -> ("Given", "Family Suffix", "")
18
+
19
+ Suffixes like Jr., Sr., III, etc. are combined with the family name.
20
+ """
21
+ parts = tuple(remove_extra_whitespace(part) for part in text.split(","))
22
+
23
+ if len(parts) == 0:
24
+ return ("", "", "")
25
+
26
+ elif len(parts) == 1:
27
+ # Mononym
28
+ return ("", "", parts[0])
29
+
30
+ elif len(parts) == 2:
31
+ # Full name: "Family, Given"
32
+ return (parts[1], parts[0], "")
33
+
34
+ elif len(parts) == 3:
35
+ # Full name with suffix: "Family, Suffix, Given"
36
+ # Combine family name and suffix (e.g., "Belnap Jr.")
37
+ family_with_suffix = f"{parts[0]} {parts[1]}"
38
+ return (parts[2], family_with_suffix, "")
39
+
40
+ else:
41
+ raise ValueError(f"Unexpected number of author parts found in '{text}': '{parts}'. Expected 3 or less.")
42
+
43
+
44
+ def _parse_single(normalized_name_parts: Tuple[str, str, str], bib_string_type: TBibString) -> Author:
45
+ """
46
+ Parse a single author from a string.
47
+ """
48
+ _given_name, _family_name, _mononym = normalized_name_parts
49
+
50
+ return Author(
51
+ given_name=BibStringAttr(**{str(bib_string_type): _given_name}),
52
+ family_name=BibStringAttr(**{str(bib_string_type): _family_name}),
53
+ mononym=BibStringAttr(**{str(bib_string_type): _mononym}),
54
+ shorthand=BibStringAttr(),
55
+ famous_name=BibStringAttr(),
56
+ publications=(),
57
+ )
58
+
59
+
60
+ def parse_author(text: str, bibstring_type: TBibString) -> Ok[Tuple[Author, ...]] | Err:
61
+ """
62
+ Return either a tuple of Author objects or an error.
63
+ The input string is expected to be an ' and '-separated list of authors, with each author in the format "family_name, given_name" or "mononym".
64
+ """
65
+ try:
66
+ if text == "":
67
+ lgr.debug("Empty author string, returning empty tuple.")
68
+ return Ok(())
69
+
70
+ parts = tuple(remove_extra_whitespace(part) for part in text.split("and"))
71
+ parts_normalized = (_parse_normalize(part) for part in parts)
72
+
73
+ authors = tuple(_parse_single(part, bibstring_type) for part in parts_normalized)
74
+
75
+ return Ok(authors)
76
+
77
+ except Exception as e:
78
+ return Err(
79
+ message=f"Could not parse 'author' field with value [[ {text} ]]. {e.__class__.__name__}: {e}",
80
+ code=-1,
81
+ error_type="ParsingError",
82
+ error_trace=f"{traceback.format_exc()}",
83
+ )
@@ -0,0 +1,8 @@
1
+ from philoch_bib_sdk.logic.models import BibStringAttr, MaybeStr, TBibString
2
+
3
+
4
+ def format_bib_string_attr(bib_string: MaybeStr[BibStringAttr], bibstring_type: TBibString) -> str:
5
+ """
6
+ Format a BibStringAttr into a string representation.
7
+ """
8
+ return "" if not bib_string else getattr(bib_string, bibstring_type, "")
@@ -0,0 +1,21 @@
1
+ from philoch_bib_sdk.logic.models import BibKeyAttr, MaybeStr
2
+
3
+
4
+ def format_bibkey(bibkey: MaybeStr[BibKeyAttr]) -> str:
5
+
6
+ if bibkey == "":
7
+ return ""
8
+
9
+ if bibkey.other_authors:
10
+ authors_l = [bibkey.first_author, bibkey.other_authors]
11
+ else:
12
+ authors_l = [bibkey.first_author]
13
+
14
+ authors = "-".join(authors_l)
15
+
16
+ if isinstance(bibkey.date, int):
17
+ year = f"{bibkey.date}{bibkey.date_suffix}"
18
+ else:
19
+ year = f"{bibkey.date}-{bibkey.date_suffix}" if bibkey.date_suffix else bibkey.date
20
+
21
+ return f"{authors}:{year}"
@@ -0,0 +1,158 @@
1
+ import traceback
2
+ from typing import Tuple
3
+ from aletk.ResultMonad import Ok, Err
4
+ from aletk.utils import get_logger
5
+ from philoch_bib_sdk.logic.literals import TBasicPubState
6
+ from philoch_bib_sdk.logic.models import BibKeyAttr
7
+
8
+ lgr = get_logger(__name__)
9
+
10
+
11
+ def _parse_bibkey_author(text: str) -> Tuple[str, str]:
12
+
13
+ author_parts = text.split("-")
14
+
15
+ if len(author_parts) == 1:
16
+ first_author = author_parts[0]
17
+ other_authors = ""
18
+ elif len(author_parts) == 2:
19
+ first_author = author_parts[0]
20
+ other_authors = author_parts[1]
21
+ else:
22
+ raise ValueError(
23
+ f"Unexpected bibkey author parts in [[ {text} ]]. Found [[ {author_parts} ]]. Expected 1 author, or 2 authors separated by '-'."
24
+ )
25
+
26
+ return first_author, other_authors
27
+
28
+
29
+ def _parse_bibkey_date_int_part(text: str) -> Tuple[int | None, int | None]:
30
+
31
+ char_index_type_d = {i: (char, char.isdigit()) for i, char in enumerate(text)}
32
+
33
+ year_l: list[str] = []
34
+ int_breakpoint = None
35
+ for i, (char, is_digit) in char_index_type_d.items():
36
+ if is_digit:
37
+ year_l.append(char)
38
+ int_breakpoint = i
39
+ else:
40
+ break
41
+
42
+ if year_l != []:
43
+ year_int = int(f"{''.join(year_l)}")
44
+ else:
45
+ year_int = None
46
+
47
+ if year_int and len(f"{year_int}") > 4:
48
+ raise ValueError(f"Unexpected year value in '{text}': is not a valid year or publication state")
49
+
50
+ return year_int, int_breakpoint
51
+
52
+
53
+ def _parse_bibkey_date_suffix_part(
54
+ date_parts: str, year_int: int | None, int_breakpoint: int | None
55
+ ) -> Tuple[int | TBasicPubState, str]:
56
+
57
+ # Case 1. The first part of the year is a digit
58
+ if int_breakpoint is not None:
59
+ if year_int is None:
60
+ raise ValueError(
61
+ f"Unexpected case! year_int is None but int_breakpoint is not None. This should not happen."
62
+ )
63
+
64
+ date_suffix_raw = date_parts[int_breakpoint + 1 :]
65
+ return (
66
+ year_int,
67
+ date_suffix_raw,
68
+ )
69
+
70
+ if year_int is not None:
71
+ raise ValueError(f"Unexpected case! year_int is None but int_breakpoint is not None. This should not happen.")
72
+
73
+ # Case 2. first characters are non-digits
74
+ # has to start with either "unpub" or "forthcoming" then
75
+ date_suffix_raw = "".join(date_parts)
76
+
77
+ if not (date_suffix_raw.startswith("forthcoming") or date_suffix_raw.startswith("unpub")):
78
+ raise ValueError(f"Unexpected year value in '{date_parts}': it is not a valid publication state.")
79
+
80
+ date_suffix_parts = date_suffix_raw.split("-")
81
+
82
+ if len(date_suffix_parts) == 2:
83
+ suffix = date_suffix_parts[1]
84
+ if not suffix:
85
+ raise ValueError(
86
+ f"Unexpected year value in '{date_parts}': it is not a valid publication state. Expected a suffix after '-'."
87
+ )
88
+ elif len(date_suffix_parts) == 1:
89
+ suffix = ""
90
+ else:
91
+ raise ValueError(f"Unexpected year value in '{date_parts}': it is not a valid publication state.")
92
+
93
+ pubstate: TBasicPubState = ""
94
+ if date_suffix_parts[0] == "unpub":
95
+ pubstate = "unpub"
96
+ elif date_suffix_parts[0] == "forthcoming":
97
+ pubstate = "forthcoming"
98
+ else:
99
+ raise ValueError(f"Unexpected year value in '{date_parts}': it is not a valid publication state.")
100
+
101
+ return pubstate, suffix
102
+
103
+
104
+ def parse_bibkey(text: str) -> Ok[BibKeyAttr] | Err:
105
+ """
106
+ Return either a Bibkey object, or a BibkeyError object to indicate a parsing error.
107
+ """
108
+
109
+ try:
110
+ bibkey_parts = text.split(":")
111
+ if len(bibkey_parts) != 2:
112
+ raise ValueError(
113
+ f"Unexpected number of bibkey parts in [[ {text} ]]. Expected only two parts separated by ':'."
114
+ )
115
+
116
+ # Parse the author part
117
+ first_author, other_authors = _parse_bibkey_author(bibkey_parts[0])
118
+
119
+ # Parse the date part
120
+ date_parts = bibkey_parts[1]
121
+
122
+ year_int, int_breakpoint = _parse_bibkey_date_int_part(date_parts)
123
+
124
+ # Parse the date suffix part
125
+ date, date_suffix = _parse_bibkey_date_suffix_part(date_parts, year_int, int_breakpoint)
126
+
127
+ return Ok(
128
+ BibKeyAttr(
129
+ first_author=first_author,
130
+ other_authors=other_authors,
131
+ date=date,
132
+ date_suffix=date_suffix,
133
+ )
134
+ )
135
+
136
+ except Exception as e:
137
+ error_message = f"Could not parse bibkey for '{text}'"
138
+
139
+ return Err(
140
+ message=error_message,
141
+ code=-1,
142
+ error_type="BibkeyError",
143
+ error_trace=f"{traceback.format_exc()}",
144
+ )
145
+
146
+
147
+ def hard_parse_bibkey(text: str) -> BibKeyAttr:
148
+ """
149
+ Hard parse a bibkey, without any error handling.
150
+ This is used for testing purposes only.
151
+ """
152
+
153
+ bibkey_parsed = parse_bibkey(text)
154
+
155
+ if isinstance(bibkey_parsed, Err):
156
+ raise ValueError(f"Could not hard parse '{text}' as bibkey: {bibkey_parsed.message}")
157
+
158
+ return bibkey_parsed.out
@@ -0,0 +1,37 @@
1
+ from typing import Literal
2
+ from philoch_bib_sdk.logic.models import VALID_DATE_FORMATS, BibItemDateAttr
3
+
4
+
5
+ def format_date(date: BibItemDateAttr | Literal["no date"]) -> str:
6
+
7
+ if date == "no date":
8
+ return "no date"
9
+
10
+ match date:
11
+ case BibItemDateAttr(year=year, year_part_2_hyphen=None, year_part_2_slash=None, month=None, day=None):
12
+ return str(year)
13
+
14
+ case BibItemDateAttr(year=year, year_part_2_hyphen=None, year_part_2_slash=None, month=month, day=day) if (
15
+ month is not None and day is not None
16
+ ):
17
+ return f"{year}-{str(month).zfill(2)}-{str(day).zfill(2)}"
18
+
19
+ case BibItemDateAttr(year=year, year_part_2_hyphen=None, year_part_2_slash=None, month=month, day=None) if (
20
+ month is not None
21
+ ):
22
+ return f"{year}-{str(month).zfill(2)}"
23
+
24
+ case BibItemDateAttr(
25
+ year=year, year_part_2_hyphen=year_part_2_hyphen, year_part_2_slash=None, month=None, day=None
26
+ ) if (year_part_2_hyphen is not None):
27
+ return f"{year}-{year_part_2_hyphen}"
28
+
29
+ case BibItemDateAttr(
30
+ year=year, year_part_2_hyphen=None, year_part_2_slash=year_part_2_slash, month=None, day=None
31
+ ) if (year_part_2_slash is not None):
32
+ return f"{year}/{year_part_2_slash}"
33
+
34
+ case _:
35
+ raise ValueError(
36
+ f"Invalid date format. Expected one of {', '.join(VALID_DATE_FORMATS)}, but found '{date}'."
37
+ )
@@ -0,0 +1,62 @@
1
+ from aletk.utils import remove_extra_whitespace, get_logger
2
+ from aletk.ResultMonad import Ok, Err
3
+ from typing import Literal
4
+ from philoch_bib_sdk.logic.models import VALID_DATE_FORMATS, BibItemDateAttr
5
+
6
+
7
+ lgr = get_logger(__name__)
8
+
9
+
10
+ def _parse_date(text: str) -> BibItemDateAttr | Literal["no date"]:
11
+ """
12
+ Parse a single date attribute from a string.
13
+ """
14
+ text = remove_extra_whitespace(text)
15
+
16
+ if remove_extra_whitespace(text).lower() == "no date":
17
+ return "no date"
18
+
19
+ # Split by potential delimiters (hyphens or slashes)
20
+ parts = text.replace("-", "/").split("/")
21
+
22
+ # Handle the number of parts (could be year, year-year2, year/year_2, year-month-day)
23
+ if len(parts) == 1:
24
+ return BibItemDateAttr(
25
+ year=int(parts[0]), year_part_2_hyphen=None, year_part_2_slash=None, month=None, day=None
26
+ )
27
+
28
+ elif len(parts) == 2 and "-" in text:
29
+ return BibItemDateAttr(
30
+ year=int(parts[0]), year_part_2_hyphen=int(parts[1]), year_part_2_slash=None, month=None, day=None
31
+ )
32
+
33
+ elif len(parts) == 2 and "/" in text:
34
+ return BibItemDateAttr(
35
+ year=int(parts[0]), year_part_2_hyphen=None, year_part_2_slash=int(parts[1]), month=None, day=None
36
+ )
37
+
38
+ elif len(parts) == 3 and "-" in text and len(parts[1]) <= 2 and len(parts[2]) <= 2:
39
+ return BibItemDateAttr(
40
+ year=int(parts[0]), year_part_2_hyphen=None, year_part_2_slash=None, month=int(parts[1]), day=int(parts[2])
41
+ )
42
+
43
+ else:
44
+ raise ValueError(f"Invalid date format found in '{text}'. Expected one of {', '.join(VALID_DATE_FORMATS)}.")
45
+
46
+
47
+ def parse_date(text: str) -> Ok[BibItemDateAttr | Literal["no date"]] | Err:
48
+ """
49
+ Parse a single date string into a BibItemDateAttr object.
50
+ The input is expected to be a single date, either in the format '<year>' or '<year>-<month>' or '<year>-<month>-<day>' (or slashes instead of hyphens).
51
+ """
52
+ try:
53
+ return Ok(_parse_date(text))
54
+
55
+ except Exception as e:
56
+ error_message = f"Error parsing date from '{text}': {e}"
57
+ return Err(
58
+ message=error_message,
59
+ code=-1,
60
+ error_type=f"{e.__class__.__name__}",
61
+ error_trace="",
62
+ )
@@ -0,0 +1,182 @@
1
+ from typing import TypedDict
2
+ from aletk.utils import get_logger
3
+
4
+ from philoch_bib_sdk.converters.plaintext.author.formatter import format_author
5
+ from philoch_bib_sdk.converters.plaintext.bib_string_formatter import format_bib_string_attr
6
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_formatter import format_bibkey
7
+ from philoch_bib_sdk.converters.plaintext.bibitem.date_formatter import format_date
8
+ from philoch_bib_sdk.converters.plaintext.bibitem.pages_formatter import format_pages
9
+ from philoch_bib_sdk.converters.plaintext.journal.formatter import format_journal
10
+ from philoch_bib_sdk.logic.literals import TBibTeXEntryType
11
+ from philoch_bib_sdk.logic.models import BibItem
12
+
13
+
14
+ lgr = get_logger(__name__)
15
+
16
+
17
+ def format_entry_type(entry_type: TBibTeXEntryType) -> str:
18
+ """
19
+ Format the entry type for the BibItem.
20
+ """
21
+ match entry_type:
22
+ case "UNKNOWN":
23
+ return "UNKNOWN"
24
+ case _ if entry_type:
25
+ return f"@{entry_type}"
26
+ case _:
27
+ return ""
28
+
29
+
30
+ class FormattedBibItem(TypedDict, total=True):
31
+ _to_do_general: str
32
+ _change_request: str
33
+ entry_type: str
34
+ bibkey: str
35
+ author: str
36
+ _author_ids: str
37
+ editor: str
38
+ _editor_ids: str
39
+ author_ids: str
40
+ options: str
41
+ shorthand: str
42
+ date: str
43
+ pubstate: str
44
+ title: str
45
+ _title_unicode: str
46
+ booktitle: str
47
+ crossref: str
48
+ journal: str
49
+ journal_id: str
50
+ volume: str
51
+ number: str
52
+ pages: str
53
+ eid: str
54
+ series: str
55
+ address: str
56
+ institution: str
57
+ school: str
58
+ publisher: str
59
+ publisher_id: str
60
+ type: str
61
+ edition: str
62
+ note: str
63
+ _issuetitle: str
64
+ _guesteditor: str
65
+ _extra_note: str
66
+ urn: str
67
+ eprint: str
68
+ doi: str
69
+ url: str
70
+ _kw_level1: str
71
+ _kw_level2: str
72
+ _kw_level3: str
73
+ _epoch: str
74
+ _person: str
75
+ _comm_for_profile_bib: str
76
+ _langid: str
77
+ _lang_der: str
78
+ _further_refs: str
79
+ _depends_on: str
80
+ _dltc_num: str
81
+ _spec_interest: str
82
+ _note_perso: str
83
+ _note_stock: str
84
+ _note_status: str
85
+ _num_inwork_coll: str
86
+ _num_inwork: str
87
+ _num_coll: str
88
+ _dltc_copyediting_note: str
89
+ _note_missing: str
90
+ _num_sort: str
91
+
92
+
93
+ def format_bibitem(bibitem: BibItem) -> FormattedBibItem:
94
+
95
+ bibkey = format_bibkey(bibitem.bibkey)
96
+
97
+ author = format_author(bibitem.author, "latex")
98
+ editor = format_author(bibitem.editor, "latex")
99
+ person = format_author((bibitem._person,), "latex") if bibitem._person else ""
100
+
101
+ shorthand = ", ".join([author.mononym.latex for author in bibitem.author if author.mononym.latex])
102
+ date = format_date(bibitem.date)
103
+
104
+ pages = format_pages(bibitem.pages)
105
+
106
+ journal = format_journal(bibitem.journal, "latex")
107
+
108
+ crossref = format_bibkey(bibitem.crossref.bibkey) if bibitem.crossref else ""
109
+
110
+ _kw_level1, kw_level2, kw_level3 = (
111
+ bibitem._kws.level_1.name if bibitem._kws else "",
112
+ bibitem._kws.level_2.name if bibitem._kws else "",
113
+ bibitem._kws.level_3.name if bibitem._kws else "",
114
+ )
115
+
116
+ further_refs = ", ".join([format_bibkey(ref) for ref in bibitem._further_refs])
117
+ depends_on = ", ".join([format_bibkey(dep) for dep in bibitem._depends_on])
118
+
119
+ formatted: FormattedBibItem = {
120
+ "_to_do_general": bibitem._to_do_general,
121
+ "_change_request": bibitem._change_request,
122
+ "entry_type": format_entry_type(bibitem.entry_type),
123
+ "bibkey": bibkey,
124
+ "author": author,
125
+ "_author_ids": "",
126
+ "editor": editor,
127
+ "_editor_ids": "",
128
+ "author_ids": "",
129
+ "options": ", ".join(bibitem.options),
130
+ "shorthand": shorthand,
131
+ "date": date,
132
+ "pubstate": bibitem.pubstate,
133
+ "title": format_bib_string_attr(bibitem.title, "latex"),
134
+ "_title_unicode": format_bib_string_attr(bibitem.title, "unicode"),
135
+ "booktitle": format_bib_string_attr(bibitem.booktitle, "latex"),
136
+ "crossref": crossref,
137
+ "journal": journal,
138
+ "journal_id": "",
139
+ "volume": bibitem.volume,
140
+ "number": bibitem.number,
141
+ "pages": pages,
142
+ "eid": bibitem.eid,
143
+ "series": format_bib_string_attr(bibitem.series.name, "latex") if bibitem.series else "",
144
+ "address": format_bib_string_attr(bibitem.address, "latex"),
145
+ "institution": format_bib_string_attr(bibitem.institution, "latex"),
146
+ "school": format_bib_string_attr(bibitem.school, "latex"),
147
+ "publisher": format_bib_string_attr(bibitem.publisher, "latex"),
148
+ "publisher_id": "",
149
+ "type": format_bib_string_attr(bibitem.type, "latex"),
150
+ "edition": str(bibitem.edition) if bibitem.edition is not None else "",
151
+ "note": format_bib_string_attr(bibitem.note, "latex"),
152
+ "_issuetitle": format_bib_string_attr(bibitem.issuetitle, "latex") if bibitem.issuetitle else "",
153
+ "_guesteditor": ", ".join(format_author(tuple(author for author in bibitem._guesteditor), "latex")),
154
+ "_extra_note": format_bib_string_attr(bibitem._extra_note, "latex") if bibitem._extra_note else "",
155
+ "urn": bibitem.urn,
156
+ "eprint": bibitem.eprint,
157
+ "doi": bibitem.doi,
158
+ "url": bibitem.url,
159
+ "_kw_level1": _kw_level1,
160
+ "_kw_level2": kw_level2,
161
+ "_kw_level3": kw_level3,
162
+ "_epoch": bibitem._epoch,
163
+ "_person": person,
164
+ "_comm_for_profile_bib": bibitem._comm_for_profile_bib,
165
+ "_langid": bibitem._langid,
166
+ "_lang_der": bibitem._lang_der,
167
+ "_further_refs": further_refs,
168
+ "_depends_on": depends_on,
169
+ "_dltc_num": str(bibitem._dltc_num) if bibitem._dltc_num is not None else "",
170
+ "_spec_interest": bibitem._spec_interest,
171
+ "_note_perso": bibitem._note_perso,
172
+ "_note_stock": bibitem._note_stock,
173
+ "_note_status": bibitem._note_status,
174
+ "_num_inwork_coll": str(bibitem._num_inwork_coll) if bibitem._num_inwork_coll is not None else "",
175
+ "_num_inwork": bibitem._num_inwork,
176
+ "_num_coll": str(bibitem._num_coll) if bibitem._num_coll is not None else "",
177
+ "_dltc_copyediting_note": bibitem._dltc_copyediting_note,
178
+ "_note_missing": bibitem._note_missing,
179
+ "_num_sort": str(bibitem._num_sort) if bibitem._num_sort is not None else "",
180
+ }
181
+
182
+ return formatted
@@ -0,0 +1,13 @@
1
+ from typing import Tuple
2
+ from philoch_bib_sdk.logic.models import PageAttr
3
+
4
+
5
+ def _pages_single_str(page_pair: PageAttr) -> str:
6
+ return "--".join((page_pair.start, page_pair.end)) if page_pair.end else page_pair.start
7
+
8
+
9
+ def format_pages(pages: Tuple[PageAttr, ...]) -> str:
10
+ if pages is tuple():
11
+ return ""
12
+
13
+ return ", ".join((_pages_single_str(page_pair) for page_pair in pages))
@@ -0,0 +1,63 @@
1
+ import re
2
+ import traceback
3
+ from typing import Tuple
4
+ from aletk.utils import remove_extra_whitespace
5
+ from aletk.ResultMonad import Ok, Err
6
+
7
+ from philoch_bib_sdk.logic.models import PageAttr
8
+
9
+
10
+ def is_valid_roman(raw_str: str) -> bool:
11
+ """
12
+ TODO: TBD, decide if we want to control if the pages are in roman numbers.
13
+ """
14
+ raw_str = raw_str.upper()
15
+ pattern = r"^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$"
16
+ return bool(re.match(pattern, raw_str))
17
+
18
+
19
+ def _parse_single_page_attr(
20
+ text: str,
21
+ ) -> PageAttr:
22
+ """
23
+ Parse a single page attribute from a string.
24
+ """
25
+ if "--" not in text and "-" in text:
26
+ raise ValueError(f"Unexpected page format found in '{text}'. Expected either '<start>--<end>' or '<page>'.")
27
+ elif "--" in text:
28
+ parts = remove_extra_whitespace(text).split("--")
29
+
30
+ if len(parts) != 2:
31
+ raise ValueError(f"Unexpected number of page parts found in '{text}': '{parts}'. Expected exactly 2.")
32
+
33
+ start_page, end_page = (remove_extra_whitespace(part) for part in parts)
34
+
35
+ else:
36
+ start_page = remove_extra_whitespace(text)
37
+ end_page = ""
38
+
39
+ return PageAttr(start=start_page, end=end_page)
40
+
41
+
42
+ def parse_pages(text: str) -> Ok[Tuple[PageAttr, ...]] | Err:
43
+ """
44
+ Parse a string of pages into a tuple of PageAttr objects.
45
+ The input string is expected to be a comma-separated list of page attributes, with each attribute in the format "<start>--<end>" or "<page>".
46
+ """
47
+ try:
48
+ if text == "":
49
+ return Ok(())
50
+
51
+ parts = (remove_extra_whitespace(part) for part in text.split(","))
52
+ parts_normalized = (_parse_single_page_attr(part) for part in parts)
53
+
54
+ return Ok(tuple(parts_normalized))
55
+
56
+ except Exception as e:
57
+ error_message = f"Error parsing pages from '{text}': {e}"
58
+ return Err(
59
+ error_message,
60
+ code=-1,
61
+ error_type=f"{e.__class__.__name__}",
62
+ error_trace=traceback.format_exc(),
63
+ )