philoch-bib-sdk 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ from functools import partial
2
+ from typing import Callable, NamedTuple
3
+
4
+ from philoch_bib_sdk.converters.plaintext.bibitem.bibkey_parser import hard_parse_bibkey, parse_bibkey
5
+ from philoch_bib_sdk.logic.functions.journal_article_matcher import (
6
+ TJournalBibkeyIndex,
7
+ TReadIndex,
8
+ )
9
+
10
+ from aletk.ResultMonad import Err
11
+
12
+
13
+ class ColumnNames(NamedTuple):
14
+ bibkey: str
15
+ journal: str
16
+ volume: str
17
+ number: str
18
+
19
+
20
+ def _read_from_ods(
21
+ column_names: ColumnNames,
22
+ file_path: str,
23
+ ) -> TJournalBibkeyIndex:
24
+ """
25
+ Reads the specified columns from an ODS file and returns a TJournalBibkeyIndex dictionary.
26
+ Args:
27
+ column_names (ColumnNames): The names of the columns to read (journal, volume, number, bibkey).
28
+ file_path (str): The path to the ODS file.
29
+ Returns:
30
+ TJournalBibkeyIndex: A dictionary mapping (journal, volume, number) tuples to bibkey values.
31
+ """
32
+ import polars as pl
33
+
34
+ df = pl.read_ods(
35
+ source=file_path,
36
+ has_header=True,
37
+ columns=[column_names.journal, column_names.volume, column_names.number, column_names.bibkey],
38
+ schema_overrides={
39
+ column_names.journal: pl.Utf8,
40
+ column_names.volume: pl.Utf8,
41
+ column_names.number: pl.Utf8,
42
+ column_names.bibkey: pl.Utf8,
43
+ },
44
+ )
45
+
46
+ if df.is_empty():
47
+ raise ValueError(
48
+ f"Tabular data at '{file_path}' is empty or does not contain the expected columns: {column_names}"
49
+ )
50
+
51
+ bibkeys_parsed = (parse_bibkey(str(row[column_names.bibkey])) for row in df.to_dicts())
52
+
53
+ bibkey_errors = [bibkey for bibkey in bibkeys_parsed if isinstance(bibkey, Err)]
54
+
55
+ if bibkey_errors != []:
56
+ raise ValueError(
57
+ f"Failed to parse bibkeys in the ODS file at '{file_path}': {' --- '.join(str(bibkey_errors))}"
58
+ )
59
+
60
+ return {
61
+ (row[column_names.journal], row[column_names.volume], row[column_names.number]): hard_parse_bibkey(
62
+ row[column_names.bibkey]
63
+ )
64
+ for row in df.to_dicts()
65
+ }
66
+
67
+
68
+ type THOFReadFromOds = Callable[[ColumnNames], TReadIndex]
69
+ hof_read_from_ods: THOFReadFromOds = lambda column_names: partial(_read_from_ods, column_names)
@@ -142,3 +142,17 @@ def parse_bibkey(text: str) -> Ok[BibKeyAttr] | Err:
142
142
  error_type="BibkeyError",
143
143
  error_trace=f"{traceback.format_exc()}",
144
144
  )
145
+
146
+
147
+ def hard_parse_bibkey(text: str) -> BibKeyAttr:
148
+ """
149
+ Hard parse a bibkey, without any error handling.
150
+ This is used for testing purposes only.
151
+ """
152
+
153
+ bibkey_parsed = parse_bibkey(text)
154
+
155
+ if isinstance(bibkey_parsed, Err):
156
+ raise ValueError(f"Could not hard parse '{text}' as bibkey: {bibkey_parsed.message}")
157
+
158
+ return bibkey_parsed.out
@@ -1,5 +1,7 @@
1
- from typing import Tuple, TypedDict, Unpack
1
+ from typing import Tuple, TypedDict, Unpack, Literal
2
+ from philoch_bib_sdk.logic.models import BibItem, PageAttr, KeywordsAttr, BibItemDateAttr, BibKeyAttr, Keyword
2
3
 
4
+ from philoch_bib_sdk.logic.literals import TBasicPubState, TBibTeXEntryType, TEpoch, TLanguageID, TPubState
3
5
  from philoch_bib_sdk.logic.models import (
4
6
  Author,
5
7
  BaseNamedRenderable,
@@ -7,6 +9,7 @@ from philoch_bib_sdk.logic.models import (
7
9
  BibItem,
8
10
  BibStringAttr,
9
11
  Journal,
12
+ Keyword,
10
13
  )
11
14
 
12
15
 
@@ -118,3 +121,195 @@ def default_journal(**kwargs: Unpack[JournalArgs]) -> Journal | None:
118
121
  issn_electronic=kwargs.get("issn_electronic", ""),
119
122
  id=kwargs.get("id", None),
120
123
  )
124
+
125
+
126
+ ############
127
+ # Support Args
128
+ ############
129
+
130
+
131
+ class PageArgs(TypedDict, total=False):
132
+ start: str
133
+ end: str
134
+
135
+
136
+ def default_page(**kwargs: Unpack[PageArgs]) -> PageAttr:
137
+ return PageAttr(
138
+ start=kwargs.get("start", ""),
139
+ end=kwargs.get("end", ""),
140
+ )
141
+
142
+
143
+ class KeywordsArgs(TypedDict, total=False):
144
+ level_1: str
145
+ level_2: str
146
+ level_3: str
147
+
148
+
149
+ def default_keywords(**kwargs: Unpack[KeywordsArgs]) -> KeywordsAttr:
150
+ return KeywordsAttr(
151
+ level_1=Keyword(name=kwargs.get("level_1", "")),
152
+ level_2=Keyword(name=kwargs.get("level_2", "")),
153
+ level_3=Keyword(name=kwargs.get("level_3", "")),
154
+ )
155
+
156
+
157
+ class BibItemDateArgs(TypedDict, total=False):
158
+ year: int
159
+ year_part_2_hyphen: int | None
160
+ year_part_2_slash: int | None
161
+ month: int | None
162
+ day: int | None
163
+
164
+
165
+ def default_bib_item_date(**kwargs: Unpack[BibItemDateArgs]) -> BibItemDateAttr:
166
+ return BibItemDateAttr(
167
+ year=kwargs.get("year", 0),
168
+ year_part_2_hyphen=kwargs.get("year_part_2_hyphen"),
169
+ year_part_2_slash=kwargs.get("year_part_2_slash"),
170
+ month=kwargs.get("month"),
171
+ day=kwargs.get("day"),
172
+ )
173
+
174
+
175
+ def parse_date(date: BibItemDateArgs | Literal["no date"]) -> BibItemDateAttr | Literal["no date"]:
176
+ if isinstance(date, dict):
177
+ return default_bib_item_date(**date)
178
+ else:
179
+ return "no date"
180
+
181
+
182
+ class BibKeyArgs(TypedDict, total=False):
183
+ first_author: str
184
+ other_authors: str
185
+ date: int | TBasicPubState
186
+ date_suffix: str
187
+
188
+
189
+ def default_bib_key(**kwargs: Unpack[BibKeyArgs]) -> BibKeyAttr:
190
+ # Then pass to BibKeyAttr
191
+ return BibKeyAttr(
192
+ first_author=kwargs.get("first_author", ""),
193
+ other_authors=kwargs.get("other_authors", ""),
194
+ date=kwargs.get("date", ""),
195
+ date_suffix=kwargs.get("date_suffix", ""),
196
+ )
197
+
198
+
199
+ ############
200
+ # BibItem Args
201
+ ############
202
+
203
+
204
+ class BibItemArgs(TypedDict, total=False):
205
+ _to_do_general: str
206
+ _change_request: str
207
+ entry_type: TBibTeXEntryType
208
+ bibkey: BibKeyArgs
209
+ author: Tuple[AuthorArgs, ...]
210
+ editor: Tuple[AuthorArgs, ...]
211
+ options: Tuple[str, ...]
212
+ date: BibItemDateArgs | Literal["no date"]
213
+ pubstate: TPubState
214
+ title: BibStringArgs
215
+ booktitle: BibStringArgs
216
+ # crossref: dict
217
+ journal: JournalArgs
218
+ volume: str
219
+ number: str
220
+ pages: Tuple[PageArgs, ...]
221
+ eid: str
222
+ series: BaseNamedRenderableArgs
223
+ address: BibStringArgs
224
+ institution: BibStringArgs
225
+ school: BibStringArgs
226
+ publisher: BibStringArgs
227
+ type: BibStringArgs
228
+ edition: int
229
+ note: BibStringArgs
230
+ issuetitle: BibStringArgs
231
+ _guesteditor: Tuple[AuthorArgs, ...]
232
+ _extra_note: BibStringArgs
233
+ urn: str
234
+ eprint: str
235
+ doi: str
236
+ url: str
237
+ _kws: KeywordsArgs
238
+ _epoch: TEpoch
239
+ _person: AuthorArgs
240
+ _comm_for_profile_bib: str
241
+ _langid: TLanguageID
242
+ _lang_der: str
243
+ _further_refs: Tuple[BibKeyArgs, ...]
244
+ _depends_on: Tuple[BibKeyArgs, ...]
245
+ _dltc_num: int
246
+ _spec_interest: str
247
+ _note_perso: str
248
+ _note_stock: str
249
+ _note_status: str
250
+ _num_inwork_coll: int
251
+ _num_inwork: str
252
+ _num_coll: int
253
+ _dltc_copyediting_note: str
254
+ _note_missing: str
255
+ _num_sort: int
256
+ id: int
257
+ _bib_info_source: str
258
+
259
+
260
+ def default_bib_item(**kwargs: Unpack[BibItemArgs]) -> BibItem:
261
+ return BibItem(
262
+ to_do_general=kwargs.get("_to_do_general", ""),
263
+ change_request=kwargs.get("_change_request", ""),
264
+ entry_type=kwargs.get("entry_type", "UNKNOWN"),
265
+ bibkey=default_bib_key(**kwargs.get("bibkey", {})) if "bibkey" in kwargs else "",
266
+ author=tuple(default_author(**a) for a in kwargs.get("author", ())),
267
+ editor=tuple(default_author(**e) for e in kwargs.get("editor", ())),
268
+ options=kwargs.get("options", ()),
269
+ date=parse_date(kwargs.get("date", "no date")),
270
+ pubstate=kwargs.get("pubstate", ""),
271
+ title=default_bib_string(**kwargs.get("title", {})) if "title" in kwargs else "",
272
+ booktitle=default_bib_string(**kwargs.get("booktitle", {})) if "booktitle" in kwargs else "",
273
+ crossref="", # Crossref is not defined in the provided context, so we leave it as an empty string
274
+ journal=default_journal(**kwargs.get("journal", {})) if "journal" in kwargs else None,
275
+ volume=kwargs.get("volume", ""),
276
+ number=kwargs.get("number", ""),
277
+ pages=tuple(default_page(**p) for p in kwargs.get("pages", ())),
278
+ eid=kwargs.get("eid", ""),
279
+ series=default_base_named_renderable(**kwargs.get("series", {})) if "series" in kwargs else "",
280
+ address=default_bib_string(**kwargs.get("address", {})) if "address" in kwargs else "",
281
+ institution=default_bib_string(**kwargs.get("institution", {})) if "institution" in kwargs else "",
282
+ school=default_bib_string(**kwargs.get("school", {})) if "school" in kwargs else "",
283
+ publisher=default_bib_string(**kwargs.get("publisher", {})) if "publisher" in kwargs else "",
284
+ type=default_bib_string(**kwargs.get("type", {})) if "type" in kwargs else "",
285
+ edition=kwargs.get("edition"),
286
+ note=default_bib_string(**kwargs.get("note", {})) if "note" in kwargs else "",
287
+ issuetitle=default_bib_string(**kwargs.get("issuetitle", {})) if "issuetitle" in kwargs else "",
288
+ guesteditor=tuple(default_author(**a) for a in kwargs.get("_guesteditor", ())),
289
+ extra_note=default_bib_string(**kwargs.get("_extra_note", {})) if "_extra_note" in kwargs else "",
290
+ urn=kwargs.get("urn", ""),
291
+ eprint=kwargs.get("eprint", ""),
292
+ doi=kwargs.get("doi", ""),
293
+ url=kwargs.get("url", ""),
294
+ kws=default_keywords(**kwargs.get("_kws", {})) if "_kws" in kwargs else "",
295
+ epoch=kwargs.get("_epoch", ""),
296
+ person=default_author(**kwargs.get("_person", {})) if "_person" in kwargs else "",
297
+ comm_for_profile_bib=kwargs.get("_comm_for_profile_bib", ""),
298
+ langid=kwargs.get("_langid", ""),
299
+ lang_der=kwargs.get("_lang_der", ""),
300
+ further_refs=tuple(default_bib_key(**b) for b in kwargs.get("_further_refs", ())),
301
+ depends_on=tuple(default_bib_key(**b) for b in kwargs.get("_depends_on", ())),
302
+ dltc_num=kwargs.get("_dltc_num"),
303
+ spec_interest=kwargs.get("_spec_interest", ""),
304
+ note_perso=kwargs.get("_note_perso", ""),
305
+ note_stock=kwargs.get("_note_stock", ""),
306
+ note_status=kwargs.get("_note_status", ""),
307
+ num_inwork_coll=kwargs.get("_num_inwork_coll"),
308
+ num_inwork=kwargs.get("_num_inwork", ""),
309
+ num_coll=kwargs.get("_num_coll"),
310
+ dltc_copyediting_note=kwargs.get("_dltc_copyediting_note", ""),
311
+ note_missing=kwargs.get("_note_missing", ""),
312
+ num_sort=kwargs.get("_num_sort"),
313
+ id=kwargs.get("id"),
314
+ bib_info_source=kwargs.get("_bib_info_source", ""),
315
+ )
@@ -0,0 +1,42 @@
1
+ from typing import Callable, Dict, Tuple
2
+ from philoch_bib_sdk.converters.plaintext.journal.formatter import format_journal
3
+ from philoch_bib_sdk.logic.models import BibItem, BibKeyAttr
4
+
5
+
6
+ type TJournalName = str
7
+
8
+ type TVolume = str
9
+
10
+ type TNumber = str
11
+
12
+ type TBibkey = str
13
+
14
+
15
+ type TJournalBibkeyIndex = Dict[
16
+ Tuple[TJournalName, TVolume, TNumber], BibKeyAttr
17
+ ] # (journal, volume, number) # bibkey
18
+
19
+
20
+ def get_bibkey_by_journal_volume_number(index: TJournalBibkeyIndex, subject: BibItem) -> BibKeyAttr:
21
+ """
22
+ Simple lookup of a Bibitem on an index for its bibkey, via the combination (journal_name, volume, number). Fails if any of the three fields are missing.
23
+ """
24
+
25
+ journal = format_journal(subject.journal, bibstring_type="latex")
26
+ volume = subject.volume
27
+ number = subject.number
28
+
29
+ if any((journal == "", volume == "", number == "")):
30
+ raise ValueError(
31
+ f"Expected subject bibitem journal with non-empty journal, volume, and number. Found [[ journal: {journal}; volume: {volume}; number: {number} ]] instead."
32
+ )
33
+
34
+ return index[(journal, volume, number)]
35
+
36
+
37
+ type TReadIndex = Callable[
38
+ [
39
+ str, # path to the index file
40
+ ],
41
+ TJournalBibkeyIndex,
42
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: philoch-bib-sdk
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Standard development kit for the Philosophie Bibliography project
5
5
  License: MIT
6
6
  Author: Luis Alejandro Bordo García
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.13
14
14
  Requires-Dist: aletk (>=0.1.6,<0.2.0)
15
15
  Requires-Dist: attrs (>=25.3.0,<26.0.0)
16
+ Requires-Dist: polars (>=1.32.3,<2.0.0)
16
17
  Description-Content-Type: text/markdown
17
18
 
18
19
  # Philosophie.ch Bibliography SDK
@@ -1,10 +1,11 @@
1
1
  philoch_bib_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py,sha256=6ocGGl9nZXqyHkPxJp5GH1NIfb-BUMTYqr84QwZOPd8,2210
2
3
  philoch_bib_sdk/converters/latex.py,sha256=LuAKLrClECuBeaDQYJc7tIJECEV4h0kt0VE_ssv3s0o,236
3
4
  philoch_bib_sdk/converters/plaintext/author/formatter.py,sha256=hsqKUyNhIZeqisnEQU43DokAfEfG78rgQ8POTjYnToM,965
4
5
  philoch_bib_sdk/converters/plaintext/author/parser.py,sha256=LL12mtgN59eJCv551c6s7YfMTjfJAJqm-jRQkdntmIg,2514
5
6
  philoch_bib_sdk/converters/plaintext/bib_string_formatter.py,sha256=5Z97u5GryHUgZcPhWE41thgWCB4wYu22pZ9et6nakmw,329
6
7
  philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py,sha256=YivsY0gblKJdC4yKYZ3tvWmKIvFXW4iNht9zhz8oFUs,565
7
- philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py,sha256=lmcDD-NN0hiAF_uKRUcucTBELd_F9q_qjbW9Df7srwY,4624
8
+ philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py,sha256=TKHFQ9QeZ2Jgm3sFCjTqz_PDfous0amvz3DB0AJA51E,4991
8
9
  philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py,sha256=G2mbaJidDg8avKBbro1rVcEznPC92XVTDQ4fSdmvhJo,1480
9
10
  philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py,sha256=3ZYGhhGqILzrvnwOvG4NPAjErLwVva0dfsN0B9eFomg,2242
10
11
  philoch_bib_sdk/converters/plaintext/bibitem/formatter.py,sha256=EjSwHYAPn0YRjeLGK_rCi26Wtug6X5x5DFEKPjStn30,6298
@@ -14,13 +15,14 @@ philoch_bib_sdk/converters/plaintext/bibitem/parser.py,sha256=B8xF3OKr6R4-FwsV8i
14
15
  philoch_bib_sdk/converters/plaintext/journal/formatter.py,sha256=o5ikU-aNFr6cxgzD0rBCjymHLpGrD6RGvNE8V2sX52s,599
15
16
  philoch_bib_sdk/converters/plaintext/journal/parser.py,sha256=kT1YHwc9Am82WHRhaSWXaCeKitPn9QLWIbmIe8T1of4,1092
16
17
  philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py,sha256=oS5u8RJpkRXaDTmauVqZi-uuXsyG-UQZMK2pgzSk-qo,686
17
- philoch_bib_sdk/logic/default_models.py,sha256=UnPmtPyNfqkKaTuhWLi5hiP1FCPJRnLXqsjH1gYrQic,3368
18
+ philoch_bib_sdk/logic/default_models.py,sha256=cHHKSFmNR29qBxQkPwelQ09sx66isHlAIr1PiIHAvH4,10467
18
19
  philoch_bib_sdk/logic/functions/comparator.py,sha256=4G5EUEVf8v6URt1v1Fqk1pjqni6fxUs_Goh4EQ4RBJY,4034
20
+ philoch_bib_sdk/logic/functions/journal_article_matcher.py,sha256=IcnTv07Gk68M1LGB0Y9Z2DZsLYsWRzJCeL_c1r29bqg,1219
19
21
  philoch_bib_sdk/logic/literals.py,sha256=_9poyFdSqbMWNg686xaexvkZTIrpCbQqPNTCVi0PlFc,1573
20
22
  philoch_bib_sdk/logic/models.py,sha256=xHCQWFq_rEcX967icALD4oOQjM8AlLKLzXQ8SP-YNis,8681
21
23
  philoch_bib_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- philoch_bib_sdk-0.1.3.dist-info/LICENSE,sha256=nplGobji9gkYmJxDBbBz2SKjZY27SUaqhqKkpUB-C30,1070
23
- philoch_bib_sdk-0.1.3.dist-info/METADATA,sha256=o2nbm6-lqZ_1hrs9SBLFLWircDKtyWRKvhuYoNFF2mI,776
24
- philoch_bib_sdk-0.1.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
25
- philoch_bib_sdk-0.1.3.dist-info/entry_points.txt,sha256=5PDcoKK00cdaL0CabioRUz08ZJeXLa94Ca-C0umGPTU,46
26
- philoch_bib_sdk-0.1.3.dist-info/RECORD,,
24
+ philoch_bib_sdk-0.1.5.dist-info/LICENSE,sha256=nplGobji9gkYmJxDBbBz2SKjZY27SUaqhqKkpUB-C30,1070
25
+ philoch_bib_sdk-0.1.5.dist-info/METADATA,sha256=wGBNDCmurQsZbrDFiO9PrTHUfp0B3dLeLOLre7Io6Ns,816
26
+ philoch_bib_sdk-0.1.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
27
+ philoch_bib_sdk-0.1.5.dist-info/entry_points.txt,sha256=5PDcoKK00cdaL0CabioRUz08ZJeXLa94Ca-C0umGPTU,46
28
+ philoch_bib_sdk-0.1.5.dist-info/RECORD,,