philoch-bib-sdk 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
- philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +10 -9
- philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +14 -0
- philoch_bib_sdk/converters/plaintext/bibitem/parser.py +409 -1
- philoch_bib_sdk/logic/functions/journal_article_matcher.py +7 -3
- philoch_bib_sdk/logic/literals.py +15 -7
- {philoch_bib_sdk-0.1.4.dist-info → philoch_bib_sdk-0.1.6.dist-info}/METADATA +5 -2
- {philoch_bib_sdk-0.1.4.dist-info → philoch_bib_sdk-0.1.6.dist-info}/RECORD +11 -10
- {philoch_bib_sdk-0.1.4.dist-info → philoch_bib_sdk-0.1.6.dist-info}/WHEEL +1 -1
- {philoch_bib_sdk-0.1.4.dist-info → philoch_bib_sdk-0.1.6.dist-info}/entry_points.txt +0 -0
- {philoch_bib_sdk-0.1.4.dist-info → philoch_bib_sdk-0.1.6.dist-info/licenses}/LICENSE +0 -0
|
File without changes
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
from functools import partial
|
|
2
2
|
from typing import Callable, NamedTuple
|
|
3
3
|
|
|
4
|
+
from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_parser import hard_parse_bibkey, parse_bibkey
|
|
4
5
|
from philoch_bib_sdk.logic.functions.journal_article_matcher import (
|
|
5
|
-
TBibkey,
|
|
6
6
|
TJournalBibkeyIndex,
|
|
7
|
-
TJournalName,
|
|
8
|
-
TNumber,
|
|
9
7
|
TReadIndex,
|
|
10
|
-
TVolume,
|
|
11
8
|
)
|
|
12
9
|
|
|
10
|
+
from aletk.ResultMonad import Err
|
|
11
|
+
|
|
13
12
|
|
|
14
13
|
class ColumnNames(NamedTuple):
|
|
15
|
-
bibkey:
|
|
16
|
-
journal:
|
|
17
|
-
volume:
|
|
18
|
-
number:
|
|
14
|
+
bibkey: str
|
|
15
|
+
journal: str
|
|
16
|
+
volume: str
|
|
17
|
+
number: str
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
def _read_from_ods(
|
|
@@ -50,7 +49,9 @@ def _read_from_ods(
|
|
|
50
49
|
)
|
|
51
50
|
|
|
52
51
|
return {
|
|
53
|
-
(row[column_names.journal], row[column_names.volume], row[column_names.number]):
|
|
52
|
+
(row[column_names.journal], row[column_names.volume], row[column_names.number]): hard_parse_bibkey(
|
|
53
|
+
row[column_names.bibkey]
|
|
54
|
+
)
|
|
54
55
|
for row in df.to_dicts()
|
|
55
56
|
}
|
|
56
57
|
|
|
@@ -142,3 +142,17 @@ def parse_bibkey(text: str) -> Ok[BibKeyAttr] | Err:
|
|
|
142
142
|
error_type="BibkeyError",
|
|
143
143
|
error_trace=f"{traceback.format_exc()}",
|
|
144
144
|
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def hard_parse_bibkey(text: str) -> BibKeyAttr:
|
|
148
|
+
"""
|
|
149
|
+
Hard parse a bibkey, without any error handling.
|
|
150
|
+
This is used for testing purposes only.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
bibkey_parsed = parse_bibkey(text)
|
|
154
|
+
|
|
155
|
+
if isinstance(bibkey_parsed, Err):
|
|
156
|
+
raise ValueError(f"Could not hard parse '{text}' as bibkey: {bibkey_parsed.message}")
|
|
157
|
+
|
|
158
|
+
return bibkey_parsed.out
|
|
@@ -1,3 +1,411 @@
|
|
|
1
|
-
|
|
1
|
+
import traceback
|
|
2
|
+
from typing import Tuple, Literal, TypedDict, TypeGuard, Any
|
|
3
|
+
from aletk.ResultMonad import Ok, Err
|
|
4
|
+
from aletk.utils import get_logger, remove_extra_whitespace
|
|
5
|
+
from philoch_bib_sdk.converters.plaintext.author.parser import parse_author
|
|
6
|
+
from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_parser import parse_bibkey
|
|
7
|
+
from philoch_bib_sdk.converters.plaintext.bibitem.date_parser import parse_date
|
|
8
|
+
from philoch_bib_sdk.converters.plaintext.bibitem.pages_parser import parse_pages
|
|
9
|
+
from philoch_bib_sdk.converters.plaintext.journal.parser import parse_journal
|
|
10
|
+
from philoch_bib_sdk.logic.literals import (
|
|
11
|
+
TBibTeXEntryType,
|
|
12
|
+
TPubState,
|
|
13
|
+
TEpoch,
|
|
14
|
+
TLanguageID,
|
|
15
|
+
BIBTEX_ENTRY_TYPE_VALUES,
|
|
16
|
+
PUB_STATE_VALUES,
|
|
17
|
+
EPOCH_VALUES,
|
|
18
|
+
LANGUAGE_ID_VALUES,
|
|
19
|
+
)
|
|
20
|
+
from philoch_bib_sdk.logic.models import (
|
|
21
|
+
BibItem,
|
|
22
|
+
BibStringAttr,
|
|
23
|
+
BibKeyAttr,
|
|
24
|
+
Author,
|
|
25
|
+
PageAttr,
|
|
26
|
+
BibItemDateAttr,
|
|
27
|
+
BaseNamedRenderable,
|
|
28
|
+
KeywordsAttr,
|
|
29
|
+
Keyword,
|
|
30
|
+
TBibString,
|
|
31
|
+
)
|
|
2
32
|
|
|
3
33
|
lgr = get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ParsedBibItemData(TypedDict, total=False):
|
|
37
|
+
_to_do_general: str
|
|
38
|
+
_change_request: str
|
|
39
|
+
entry_type: str
|
|
40
|
+
bibkey: str
|
|
41
|
+
author: str
|
|
42
|
+
_author_ids: str
|
|
43
|
+
editor: str
|
|
44
|
+
_editor_ids: str
|
|
45
|
+
author_ids: str
|
|
46
|
+
options: str
|
|
47
|
+
shorthand: str
|
|
48
|
+
date: str
|
|
49
|
+
pubstate: str
|
|
50
|
+
title: str
|
|
51
|
+
_title_unicode: str
|
|
52
|
+
booktitle: str
|
|
53
|
+
crossref: str
|
|
54
|
+
journal: str
|
|
55
|
+
journal_id: str
|
|
56
|
+
volume: str
|
|
57
|
+
number: str
|
|
58
|
+
pages: str
|
|
59
|
+
eid: str
|
|
60
|
+
series: str
|
|
61
|
+
address: str
|
|
62
|
+
institution: str
|
|
63
|
+
school: str
|
|
64
|
+
publisher: str
|
|
65
|
+
publisher_id: str
|
|
66
|
+
type: str
|
|
67
|
+
edition: str
|
|
68
|
+
note: str
|
|
69
|
+
_issuetitle: str
|
|
70
|
+
_guesteditor: str
|
|
71
|
+
_extra_note: str
|
|
72
|
+
urn: str
|
|
73
|
+
eprint: str
|
|
74
|
+
doi: str
|
|
75
|
+
url: str
|
|
76
|
+
_kw_level1: str
|
|
77
|
+
_kw_level2: str
|
|
78
|
+
_kw_level3: str
|
|
79
|
+
_epoch: str
|
|
80
|
+
_person: str
|
|
81
|
+
_comm_for_profile_bib: str
|
|
82
|
+
_langid: str
|
|
83
|
+
_lang_der: str
|
|
84
|
+
_further_refs: str
|
|
85
|
+
_depends_on: str
|
|
86
|
+
_dltc_num: str
|
|
87
|
+
_spec_interest: str
|
|
88
|
+
_note_perso: str
|
|
89
|
+
_note_stock: str
|
|
90
|
+
_note_status: str
|
|
91
|
+
_num_inwork_coll: str
|
|
92
|
+
_num_inwork: str
|
|
93
|
+
_num_coll: str
|
|
94
|
+
_dltc_copyediting_note: str
|
|
95
|
+
_note_missing: str
|
|
96
|
+
_num_sort: str
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _is_valid_bibtex_entry_type(value: Any) -> TypeGuard[TBibTeXEntryType]:
|
|
100
|
+
"""
|
|
101
|
+
TypeGuard function to validate if a value is a valid BibTeX entry type.
|
|
102
|
+
"""
|
|
103
|
+
return isinstance(value, str) and value in BIBTEX_ENTRY_TYPE_VALUES
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _parse_entry_type(text: str) -> TBibTeXEntryType:
|
|
107
|
+
"""
|
|
108
|
+
Parse the entry type from a string.
|
|
109
|
+
"""
|
|
110
|
+
if text == "" or text == "UNKNOWN":
|
|
111
|
+
return "UNKNOWN"
|
|
112
|
+
|
|
113
|
+
if text.startswith("@"):
|
|
114
|
+
entry_type_str = text[1:]
|
|
115
|
+
else:
|
|
116
|
+
entry_type_str = text
|
|
117
|
+
|
|
118
|
+
if _is_valid_bibtex_entry_type(entry_type_str):
|
|
119
|
+
return entry_type_str
|
|
120
|
+
else:
|
|
121
|
+
return "UNKNOWN"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _parse_options(text: str) -> Tuple[str, ...]:
|
|
125
|
+
"""
|
|
126
|
+
Parse a comma-separated list of options.
|
|
127
|
+
"""
|
|
128
|
+
if not text:
|
|
129
|
+
return ()
|
|
130
|
+
return tuple(remove_extra_whitespace(opt) for opt in text.split(",") if opt.strip())
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _parse_bibkey_list(text: str) -> Tuple[BibKeyAttr, ...]:
|
|
134
|
+
"""
|
|
135
|
+
Parse a comma-separated list of bibkeys.
|
|
136
|
+
"""
|
|
137
|
+
if not text:
|
|
138
|
+
return ()
|
|
139
|
+
|
|
140
|
+
bibkeys = []
|
|
141
|
+
for bibkey_str in text.split(","):
|
|
142
|
+
bibkey_str = remove_extra_whitespace(bibkey_str)
|
|
143
|
+
if bibkey_str:
|
|
144
|
+
result = parse_bibkey(bibkey_str)
|
|
145
|
+
if isinstance(result, Ok):
|
|
146
|
+
bibkeys.append(result.out)
|
|
147
|
+
else:
|
|
148
|
+
raise ValueError(f"Failed to parse bibkey '{bibkey_str}': {result.message}")
|
|
149
|
+
|
|
150
|
+
return tuple(bibkeys)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _parse_keywords(level1: str, level2: str, level3: str) -> KeywordsAttr | None:
|
|
154
|
+
"""
|
|
155
|
+
Parse keywords from three level strings.
|
|
156
|
+
"""
|
|
157
|
+
if not any([level1, level2, level3]):
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
return KeywordsAttr(
|
|
161
|
+
level_1=Keyword(name=level1, id=None) if level1 else Keyword(name="", id=None),
|
|
162
|
+
level_2=Keyword(name=level2, id=None) if level2 else Keyword(name="", id=None),
|
|
163
|
+
level_3=Keyword(name=level3, id=None) if level3 else Keyword(name="", id=None),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _is_valid_pubstate(value: Any) -> TypeGuard[TPubState]:
|
|
168
|
+
"""
|
|
169
|
+
TypeGuard function to validate if a value is a valid publication state.
|
|
170
|
+
"""
|
|
171
|
+
return isinstance(value, str) and value in PUB_STATE_VALUES
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _parse_pubstate(text: str) -> TPubState:
|
|
175
|
+
"""
|
|
176
|
+
Parse publication state from a string.
|
|
177
|
+
"""
|
|
178
|
+
if _is_valid_pubstate(text):
|
|
179
|
+
return text
|
|
180
|
+
else:
|
|
181
|
+
return ""
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _is_valid_epoch(value: Any) -> TypeGuard[TEpoch]:
|
|
185
|
+
"""
|
|
186
|
+
TypeGuard function to validate if a value is a valid epoch.
|
|
187
|
+
"""
|
|
188
|
+
return isinstance(value, str) and value in EPOCH_VALUES
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _parse_epoch(text: str) -> TEpoch:
|
|
192
|
+
"""
|
|
193
|
+
Parse epoch from a string.
|
|
194
|
+
"""
|
|
195
|
+
if _is_valid_epoch(text):
|
|
196
|
+
return text
|
|
197
|
+
else:
|
|
198
|
+
return ""
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _is_valid_language_id(value: Any) -> TypeGuard[TLanguageID]:
|
|
202
|
+
"""
|
|
203
|
+
TypeGuard function to validate if a value is a valid language ID.
|
|
204
|
+
"""
|
|
205
|
+
return isinstance(value, str) and value in LANGUAGE_ID_VALUES
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _parse_language_id(text: str) -> TLanguageID:
|
|
209
|
+
"""
|
|
210
|
+
Parse language ID from a string.
|
|
211
|
+
"""
|
|
212
|
+
if _is_valid_language_id(text):
|
|
213
|
+
return text
|
|
214
|
+
else:
|
|
215
|
+
return ""
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _create_bibstring_attr(value: str, bibstring_type: TBibString) -> BibStringAttr:
|
|
219
|
+
"""
|
|
220
|
+
Create a BibStringAttr with the correct field set based on bibstring_type.
|
|
221
|
+
"""
|
|
222
|
+
if bibstring_type == "latex":
|
|
223
|
+
return BibStringAttr(latex=value)
|
|
224
|
+
elif bibstring_type == "unicode":
|
|
225
|
+
return BibStringAttr(unicode=value)
|
|
226
|
+
else: # simplified
|
|
227
|
+
return BibStringAttr(simplified=value)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def parse_bibitem(data: ParsedBibItemData, bibstring_type: TBibString = "latex") -> Ok[BibItem] | Err:
|
|
231
|
+
"""
|
|
232
|
+
Parse a bibitem from a dictionary of string fields into a BibItem object.
|
|
233
|
+
"""
|
|
234
|
+
try:
|
|
235
|
+
# Parse bibkey
|
|
236
|
+
bibkey = None
|
|
237
|
+
if data.get("bibkey"):
|
|
238
|
+
bibkey_result = parse_bibkey(data["bibkey"])
|
|
239
|
+
if isinstance(bibkey_result, Err):
|
|
240
|
+
return bibkey_result
|
|
241
|
+
bibkey = bibkey_result.out
|
|
242
|
+
|
|
243
|
+
# Parse authors
|
|
244
|
+
authors: tuple[Author, ...] = ()
|
|
245
|
+
if data.get("author"):
|
|
246
|
+
author_result = parse_author(data["author"], bibstring_type)
|
|
247
|
+
if isinstance(author_result, Err):
|
|
248
|
+
return author_result
|
|
249
|
+
authors = author_result.out
|
|
250
|
+
|
|
251
|
+
# Parse editors
|
|
252
|
+
editors: tuple[Author, ...] = ()
|
|
253
|
+
if data.get("editor"):
|
|
254
|
+
editor_result = parse_author(data["editor"], bibstring_type)
|
|
255
|
+
if isinstance(editor_result, Err):
|
|
256
|
+
return editor_result
|
|
257
|
+
editors = editor_result.out
|
|
258
|
+
|
|
259
|
+
# Parse guest editors
|
|
260
|
+
guesteditors: tuple[Author, ...] = ()
|
|
261
|
+
if data.get("_guesteditor"):
|
|
262
|
+
guesteditor_result = parse_author(data["_guesteditor"], bibstring_type)
|
|
263
|
+
if isinstance(guesteditor_result, Err):
|
|
264
|
+
return guesteditor_result
|
|
265
|
+
guesteditors = guesteditor_result.out
|
|
266
|
+
|
|
267
|
+
# Parse person
|
|
268
|
+
person = None
|
|
269
|
+
if data.get("_person"):
|
|
270
|
+
person_result = parse_author(data["_person"], bibstring_type)
|
|
271
|
+
if isinstance(person_result, Err):
|
|
272
|
+
return person_result
|
|
273
|
+
if person_result.out:
|
|
274
|
+
person = person_result.out[0]
|
|
275
|
+
|
|
276
|
+
# Parse date
|
|
277
|
+
date: BibItemDateAttr | Literal["no date"] = BibItemDateAttr(year=0)
|
|
278
|
+
if data.get("date"):
|
|
279
|
+
date_result = parse_date(data["date"])
|
|
280
|
+
if isinstance(date_result, Err):
|
|
281
|
+
return date_result
|
|
282
|
+
date = date_result.out
|
|
283
|
+
|
|
284
|
+
# Parse pages
|
|
285
|
+
pages: tuple[PageAttr, ...] = ()
|
|
286
|
+
if data.get("pages"):
|
|
287
|
+
pages_result = parse_pages(data["pages"])
|
|
288
|
+
if isinstance(pages_result, Err):
|
|
289
|
+
return pages_result
|
|
290
|
+
pages = pages_result.out
|
|
291
|
+
|
|
292
|
+
# Parse journal
|
|
293
|
+
journal = None
|
|
294
|
+
if data.get("journal"):
|
|
295
|
+
journal_result = parse_journal(data["journal"], bibstring_type)
|
|
296
|
+
if isinstance(journal_result, Err):
|
|
297
|
+
return journal_result
|
|
298
|
+
journal = journal_result.out
|
|
299
|
+
|
|
300
|
+
# Parse crossref - for now, we'll skip complex crossref parsing and set to empty string
|
|
301
|
+
# TODO: Implement proper crossref parsing if needed
|
|
302
|
+
|
|
303
|
+
# Parse further_refs and depends_on
|
|
304
|
+
further_refs = _parse_bibkey_list(data.get("_further_refs", ""))
|
|
305
|
+
depends_on = _parse_bibkey_list(data.get("_depends_on", ""))
|
|
306
|
+
|
|
307
|
+
# Parse keywords
|
|
308
|
+
keywords = _parse_keywords(data.get("_kw_level1", ""), data.get("_kw_level2", ""), data.get("_kw_level3", ""))
|
|
309
|
+
|
|
310
|
+
# Parse edition
|
|
311
|
+
edition = None
|
|
312
|
+
if data.get("edition"):
|
|
313
|
+
edition_str = data["edition"].strip()
|
|
314
|
+
if edition_str:
|
|
315
|
+
edition = int(edition_str)
|
|
316
|
+
|
|
317
|
+
# Parse numeric fields
|
|
318
|
+
dltc_num = None
|
|
319
|
+
if data.get("_dltc_num"):
|
|
320
|
+
dltc_num_str = data["_dltc_num"].strip()
|
|
321
|
+
if dltc_num_str:
|
|
322
|
+
dltc_num = int(dltc_num_str)
|
|
323
|
+
|
|
324
|
+
num_inwork_coll = None
|
|
325
|
+
if data.get("_num_inwork_coll"):
|
|
326
|
+
num_inwork_coll_str = data["_num_inwork_coll"].strip()
|
|
327
|
+
if num_inwork_coll_str:
|
|
328
|
+
num_inwork_coll = int(num_inwork_coll_str)
|
|
329
|
+
|
|
330
|
+
num_coll = None
|
|
331
|
+
if data.get("_num_coll"):
|
|
332
|
+
num_coll_str = data["_num_coll"].strip()
|
|
333
|
+
if num_coll_str:
|
|
334
|
+
num_coll = int(num_coll_str)
|
|
335
|
+
|
|
336
|
+
num_sort = None
|
|
337
|
+
if data.get("_num_sort"):
|
|
338
|
+
num_sort_str = data["_num_sort"].strip()
|
|
339
|
+
if num_sort_str:
|
|
340
|
+
num_sort = int(num_sort_str)
|
|
341
|
+
|
|
342
|
+
# Parse series
|
|
343
|
+
series: BaseNamedRenderable | Literal[""] = ""
|
|
344
|
+
if data.get("series"):
|
|
345
|
+
series_attr = _create_bibstring_attr(data["series"], bibstring_type)
|
|
346
|
+
series = BaseNamedRenderable(name=series_attr, id=None)
|
|
347
|
+
|
|
348
|
+
# Create BibItem
|
|
349
|
+
bibitem = BibItem(
|
|
350
|
+
to_do_general=data.get("_to_do_general", ""),
|
|
351
|
+
change_request=data.get("_change_request", ""),
|
|
352
|
+
entry_type=_parse_entry_type(data.get("entry_type", "")),
|
|
353
|
+
bibkey=bibkey or "",
|
|
354
|
+
author=authors,
|
|
355
|
+
editor=editors,
|
|
356
|
+
options=_parse_options(data.get("options", "")),
|
|
357
|
+
date=date,
|
|
358
|
+
pubstate=_parse_pubstate(data.get("pubstate", "")),
|
|
359
|
+
title=_create_bibstring_attr(data["title"], bibstring_type) if data.get("title") else "",
|
|
360
|
+
booktitle=_create_bibstring_attr(data["booktitle"], bibstring_type) if data.get("booktitle") else "",
|
|
361
|
+
crossref="",
|
|
362
|
+
journal=journal,
|
|
363
|
+
volume=data.get("volume", ""),
|
|
364
|
+
number=data.get("number", ""),
|
|
365
|
+
pages=pages,
|
|
366
|
+
eid=data.get("eid", ""),
|
|
367
|
+
series=series,
|
|
368
|
+
address=_create_bibstring_attr(data["address"], bibstring_type) if data.get("address") else "",
|
|
369
|
+
institution=_create_bibstring_attr(data["institution"], bibstring_type) if data.get("institution") else "",
|
|
370
|
+
school=_create_bibstring_attr(data["school"], bibstring_type) if data.get("school") else "",
|
|
371
|
+
publisher=_create_bibstring_attr(data["publisher"], bibstring_type) if data.get("publisher") else "",
|
|
372
|
+
type=_create_bibstring_attr(data["type"], bibstring_type) if data.get("type") else "",
|
|
373
|
+
edition=edition,
|
|
374
|
+
note=_create_bibstring_attr(data["note"], bibstring_type) if data.get("note") else "",
|
|
375
|
+
issuetitle=_create_bibstring_attr(data["_issuetitle"], bibstring_type) if data.get("_issuetitle") else "",
|
|
376
|
+
guesteditor=guesteditors,
|
|
377
|
+
extra_note=_create_bibstring_attr(data["_extra_note"], bibstring_type) if data.get("_extra_note") else "",
|
|
378
|
+
urn=data.get("urn", ""),
|
|
379
|
+
eprint=data.get("eprint", ""),
|
|
380
|
+
doi=data.get("doi", ""),
|
|
381
|
+
url=data.get("url", ""),
|
|
382
|
+
kws=keywords or "",
|
|
383
|
+
epoch=_parse_epoch(data.get("_epoch", "")),
|
|
384
|
+
person=person or "",
|
|
385
|
+
comm_for_profile_bib=data.get("_comm_for_profile_bib", ""),
|
|
386
|
+
langid=_parse_language_id(data.get("_langid", "")),
|
|
387
|
+
lang_der=data.get("_lang_der", ""),
|
|
388
|
+
further_refs=further_refs,
|
|
389
|
+
depends_on=depends_on,
|
|
390
|
+
dltc_num=dltc_num,
|
|
391
|
+
spec_interest=data.get("_spec_interest", ""),
|
|
392
|
+
note_perso=data.get("_note_perso", ""),
|
|
393
|
+
note_stock=data.get("_note_stock", ""),
|
|
394
|
+
note_status=data.get("_note_status", ""),
|
|
395
|
+
num_inwork_coll=num_inwork_coll,
|
|
396
|
+
num_inwork=data.get("_num_inwork", ""),
|
|
397
|
+
num_coll=num_coll,
|
|
398
|
+
dltc_copyediting_note=data.get("_dltc_copyediting_note", ""),
|
|
399
|
+
note_missing=data.get("_note_missing", ""),
|
|
400
|
+
num_sort=num_sort,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
return Ok(bibitem)
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
return Err(
|
|
407
|
+
message=f"Failed to parse bibitem: {e.__class__.__name__}: {e}",
|
|
408
|
+
code=-1,
|
|
409
|
+
error_type="BibItemParsingError",
|
|
410
|
+
error_trace=traceback.format_exc(),
|
|
411
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Callable, Dict, Tuple
|
|
2
2
|
from philoch_bib_sdk.converters.plaintext.journal.formatter import format_journal
|
|
3
|
-
from philoch_bib_sdk.logic.models import BibItem
|
|
3
|
+
from philoch_bib_sdk.logic.models import BibItem, BibKeyAttr
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
type TJournalName = str
|
|
@@ -12,14 +12,18 @@ type TNumber = str
|
|
|
12
12
|
type TBibkey = str
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
type TJournalBibkeyIndex = Dict[
|
|
15
|
+
type TJournalBibkeyIndex = Dict[
|
|
16
|
+
Tuple[TJournalName, TVolume, TNumber], BibKeyAttr
|
|
17
|
+
] # (journal, volume, number) # bibkey
|
|
16
18
|
|
|
17
19
|
|
|
18
|
-
def get_bibkey_by_journal_volume_number(index: TJournalBibkeyIndex, subject: BibItem) ->
|
|
20
|
+
def get_bibkey_by_journal_volume_number(index: TJournalBibkeyIndex, subject: BibItem) -> BibKeyAttr:
|
|
19
21
|
"""
|
|
20
22
|
Simple lookup of a Bibitem on an index for its bibkey, via the combination (journal_name, volume, number). Fails if any of the three fields are missing.
|
|
21
23
|
"""
|
|
22
24
|
|
|
25
|
+
# TODO: need to ensure the index is unique, possibly via some fuzzy matching with the title or the author
|
|
26
|
+
|
|
23
27
|
journal = format_journal(subject.journal, bibstring_type="latex")
|
|
24
28
|
volume = subject.volume
|
|
25
29
|
number = subject.number
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from typing import Literal
|
|
1
|
+
from typing import Literal, Tuple, get_args
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
TBibTeXEntryType = Literal[
|
|
4
4
|
"article",
|
|
5
5
|
"book",
|
|
6
6
|
"incollection",
|
|
@@ -14,20 +14,22 @@ type TBibTeXEntryType = Literal[
|
|
|
14
14
|
"UNKNOWN",
|
|
15
15
|
]
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
TBasicPubState = Literal[
|
|
18
18
|
"",
|
|
19
19
|
"unpub",
|
|
20
20
|
"forthcoming",
|
|
21
21
|
]
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
TPubState = Literal[
|
|
24
|
+
"",
|
|
25
|
+
"unpub",
|
|
26
|
+
"forthcoming",
|
|
25
27
|
"inwork",
|
|
26
28
|
"submitted",
|
|
27
29
|
"published",
|
|
28
30
|
]
|
|
29
31
|
|
|
30
|
-
|
|
32
|
+
TLanguageID = Literal[
|
|
31
33
|
"",
|
|
32
34
|
"catalan",
|
|
33
35
|
"czech",
|
|
@@ -50,7 +52,7 @@ type TLanguageID = Literal[
|
|
|
50
52
|
"unknown",
|
|
51
53
|
]
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
TEpoch = Literal[
|
|
54
56
|
"",
|
|
55
57
|
"ancient-philosophy",
|
|
56
58
|
"ancient-scientists",
|
|
@@ -88,3 +90,9 @@ type TEpoch = Literal[
|
|
|
88
90
|
"theologians",
|
|
89
91
|
"vienna-circle",
|
|
90
92
|
]
|
|
93
|
+
|
|
94
|
+
# Literal value constants for runtime validation
|
|
95
|
+
BIBTEX_ENTRY_TYPE_VALUES: Tuple[TBibTeXEntryType, ...] = get_args(TBibTeXEntryType)
|
|
96
|
+
PUB_STATE_VALUES: Tuple[TPubState, ...] = get_args(TPubState)
|
|
97
|
+
EPOCH_VALUES: Tuple[TEpoch, ...] = get_args(TEpoch)
|
|
98
|
+
LANGUAGE_ID_VALUES: Tuple[TLanguageID, ...] = get_args(TLanguageID)
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: philoch-bib-sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Standard development kit for the Philosophie Bibliography project
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Luis Alejandro Bordo García
|
|
7
8
|
Author-email: luis.bordo@philosophie.ch
|
|
8
9
|
Maintainer: Luis Alejandro Bordo García
|
|
@@ -11,9 +12,11 @@ Requires-Python: >=3.13
|
|
|
11
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
13
|
Classifier: Programming Language :: Python :: 3
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
16
|
Requires-Dist: aletk (>=0.1.6,<0.2.0)
|
|
15
17
|
Requires-Dist: attrs (>=25.3.0,<26.0.0)
|
|
16
18
|
Requires-Dist: polars (>=1.32.3,<2.0.0)
|
|
19
|
+
Requires-Dist: pydantic (>=2.11.9,<3.0.0)
|
|
17
20
|
Description-Content-Type: text/markdown
|
|
18
21
|
|
|
19
22
|
# Philosophie.ch Bibliography SDK
|
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
philoch_bib_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
philoch_bib_sdk/adapters/
|
|
2
|
+
philoch_bib_sdk/adapters/plaintext/bibitem_reader.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py,sha256=Z_-R2qF2bna2qYy7E5iYOntAUm6vGbEV4lfH1fnQC0E,1858
|
|
3
4
|
philoch_bib_sdk/converters/latex.py,sha256=LuAKLrClECuBeaDQYJc7tIJECEV4h0kt0VE_ssv3s0o,236
|
|
4
5
|
philoch_bib_sdk/converters/plaintext/author/formatter.py,sha256=hsqKUyNhIZeqisnEQU43DokAfEfG78rgQ8POTjYnToM,965
|
|
5
6
|
philoch_bib_sdk/converters/plaintext/author/parser.py,sha256=LL12mtgN59eJCv551c6s7YfMTjfJAJqm-jRQkdntmIg,2514
|
|
6
7
|
philoch_bib_sdk/converters/plaintext/bib_string_formatter.py,sha256=5Z97u5GryHUgZcPhWE41thgWCB4wYu22pZ9et6nakmw,329
|
|
7
8
|
philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py,sha256=YivsY0gblKJdC4yKYZ3tvWmKIvFXW4iNht9zhz8oFUs,565
|
|
8
|
-
philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py,sha256=
|
|
9
|
+
philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py,sha256=TKHFQ9QeZ2Jgm3sFCjTqz_PDfous0amvz3DB0AJA51E,4991
|
|
9
10
|
philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py,sha256=G2mbaJidDg8avKBbro1rVcEznPC92XVTDQ4fSdmvhJo,1480
|
|
10
11
|
philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py,sha256=3ZYGhhGqILzrvnwOvG4NPAjErLwVva0dfsN0B9eFomg,2242
|
|
11
12
|
philoch_bib_sdk/converters/plaintext/bibitem/formatter.py,sha256=EjSwHYAPn0YRjeLGK_rCi26Wtug6X5x5DFEKPjStn30,6298
|
|
12
13
|
philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py,sha256=punzwm8ObrLJhsCOS1oKHSnTXMX_R_0Xs9M866J44pU,397
|
|
13
14
|
philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py,sha256=mMFviMZo5qHs0K_SXfbmjJ_nbmTGnSiKMrXyazzO2Qs,2018
|
|
14
|
-
philoch_bib_sdk/converters/plaintext/bibitem/parser.py,sha256=
|
|
15
|
+
philoch_bib_sdk/converters/plaintext/bibitem/parser.py,sha256=v-ctkd5-9XwDSBlweRgUN7cuJeipWrxsLtE-GzZNeB4,13307
|
|
15
16
|
philoch_bib_sdk/converters/plaintext/journal/formatter.py,sha256=o5ikU-aNFr6cxgzD0rBCjymHLpGrD6RGvNE8V2sX52s,599
|
|
16
17
|
philoch_bib_sdk/converters/plaintext/journal/parser.py,sha256=kT1YHwc9Am82WHRhaSWXaCeKitPn9QLWIbmIe8T1of4,1092
|
|
17
18
|
philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py,sha256=oS5u8RJpkRXaDTmauVqZi-uuXsyG-UQZMK2pgzSk-qo,686
|
|
18
19
|
philoch_bib_sdk/logic/default_models.py,sha256=cHHKSFmNR29qBxQkPwelQ09sx66isHlAIr1PiIHAvH4,10467
|
|
19
20
|
philoch_bib_sdk/logic/functions/comparator.py,sha256=4G5EUEVf8v6URt1v1Fqk1pjqni6fxUs_Goh4EQ4RBJY,4034
|
|
20
|
-
philoch_bib_sdk/logic/functions/journal_article_matcher.py,sha256=
|
|
21
|
-
philoch_bib_sdk/logic/literals.py,sha256=
|
|
21
|
+
philoch_bib_sdk/logic/functions/journal_article_matcher.py,sha256=Twv_UCRCMHEHyroG29BQjvkq_SHM60rjynfIywqCS5E,1330
|
|
22
|
+
philoch_bib_sdk/logic/literals.py,sha256=RLzpN3pJu0XZhkEUpMObr6ql-BdpMtpTcWeNw6aYBP4,1901
|
|
22
23
|
philoch_bib_sdk/logic/models.py,sha256=xHCQWFq_rEcX967icALD4oOQjM8AlLKLzXQ8SP-YNis,8681
|
|
23
24
|
philoch_bib_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
-
philoch_bib_sdk-0.1.
|
|
25
|
-
philoch_bib_sdk-0.1.
|
|
26
|
-
philoch_bib_sdk-0.1.
|
|
27
|
-
philoch_bib_sdk-0.1.
|
|
28
|
-
philoch_bib_sdk-0.1.
|
|
25
|
+
philoch_bib_sdk-0.1.6.dist-info/METADATA,sha256=Po2I7HL2L0ZCkFyJFCzsisDP7Jp8rY73tDukdJY1Pug,931
|
|
26
|
+
philoch_bib_sdk-0.1.6.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
|
|
27
|
+
philoch_bib_sdk-0.1.6.dist-info/entry_points.txt,sha256=5PDcoKK00cdaL0CabioRUz08ZJeXLa94Ca-C0umGPTU,46
|
|
28
|
+
philoch_bib_sdk-0.1.6.dist-info/licenses/LICENSE,sha256=nplGobji9gkYmJxDBbBz2SKjZY27SUaqhqKkpUB-C30,1070
|
|
29
|
+
philoch_bib_sdk-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|