philoch-bib-sdk 0.3.9__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. philoch_bib_sdk/__init__.py +0 -0
  2. philoch_bib_sdk/_rust.cp313-win_amd64.pyd +0 -0
  3. philoch_bib_sdk/adapters/io/__init__.py +115 -0
  4. philoch_bib_sdk/adapters/io/csv/__init__.py +308 -0
  5. philoch_bib_sdk/adapters/io/ods/__init__.py +145 -0
  6. philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
  7. philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +58 -0
  8. philoch_bib_sdk/converters/latex.py +6 -0
  9. philoch_bib_sdk/converters/plaintext/author/formatter.py +34 -0
  10. philoch_bib_sdk/converters/plaintext/author/parser.py +83 -0
  11. philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +8 -0
  12. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +21 -0
  13. philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +158 -0
  14. philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +37 -0
  15. philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +62 -0
  16. philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +182 -0
  17. philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +13 -0
  18. philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +63 -0
  19. philoch_bib_sdk/converters/plaintext/bibitem/parser.py +415 -0
  20. philoch_bib_sdk/converters/plaintext/journal/formatter.py +25 -0
  21. philoch_bib_sdk/converters/plaintext/journal/parser.py +36 -0
  22. philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +25 -0
  23. philoch_bib_sdk/interfaces/cli/__init__.py +3 -0
  24. philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +135 -0
  25. philoch_bib_sdk/logic/__init__.py +39 -0
  26. philoch_bib_sdk/logic/default_models.py +315 -0
  27. philoch_bib_sdk/logic/functions/__init__.py +31 -0
  28. philoch_bib_sdk/logic/functions/comparator.py +414 -0
  29. philoch_bib_sdk/logic/functions/fuzzy_matcher.py +796 -0
  30. philoch_bib_sdk/logic/functions/journal_article_matcher.py +44 -0
  31. philoch_bib_sdk/logic/literals.py +98 -0
  32. philoch_bib_sdk/logic/models.py +366 -0
  33. philoch_bib_sdk/logic/models_staging.py +173 -0
  34. philoch_bib_sdk/procedures/fuzzy_matching.py +112 -0
  35. philoch_bib_sdk/py.typed +0 -0
  36. philoch_bib_sdk/rust_scorer/Cargo.lock +232 -0
  37. philoch_bib_sdk/rust_scorer/Cargo.toml +26 -0
  38. philoch_bib_sdk/rust_scorer/pyproject.toml +15 -0
  39. philoch_bib_sdk/rust_scorer/rust_scorer.pyi +65 -0
  40. philoch_bib_sdk/rust_scorer/src/lib.rs +362 -0
  41. philoch_bib_sdk-0.3.9.dist-info/METADATA +15 -0
  42. philoch_bib_sdk-0.3.9.dist-info/RECORD +44 -0
  43. philoch_bib_sdk-0.3.9.dist-info/WHEEL +4 -0
  44. philoch_bib_sdk-0.3.9.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,315 @@
1
+ from typing import Tuple, TypedDict, Unpack, Literal
2
+ from philoch_bib_sdk.logic.models import BibItem, PageAttr, KeywordsAttr, BibItemDateAttr, BibKeyAttr, Keyword
3
+
4
+ from philoch_bib_sdk.logic.literals import TBasicPubState, TBibTeXEntryType, TEpoch, TLanguageID, TPubState
5
+ from philoch_bib_sdk.logic.models import (
6
+ Author,
7
+ BaseNamedRenderable,
8
+ BaseRenderable,
9
+ BibItem,
10
+ BibStringAttr,
11
+ Journal,
12
+ Keyword,
13
+ )
14
+
15
+
16
+ class BibStringArgs(TypedDict, total=False):
17
+ latex: str
18
+ unicode: str
19
+ simplified: str
20
+
21
+
22
+ def default_bib_string(**kwargs: Unpack[BibStringArgs]) -> BibStringAttr:
23
+ """
24
+ Create a default BibString object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
25
+ """
26
+ return BibStringAttr(
27
+ latex=kwargs.get("latex", ""),
28
+ unicode=kwargs.get("unicode", ""),
29
+ simplified=kwargs.get("simplified", ""),
30
+ )
31
+
32
+
33
+ ############
34
+ # Base Renderables
35
+ ############
36
+
37
+
38
+ class BaseRenderableArgs(TypedDict, total=False):
39
+ text: BibStringArgs
40
+ id: int | None
41
+
42
+
43
+ def default_base_renderable(**kwargs: Unpack[BaseRenderableArgs]) -> BaseRenderable:
44
+ """
45
+ Create a default BaseRenderable object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
46
+ """
47
+ return BaseRenderable(
48
+ text=default_bib_string(**kwargs.get("text", {})),
49
+ id=kwargs.get("id", None),
50
+ )
51
+
52
+
53
+ class BaseNamedRenderableArgs(TypedDict, total=False):
54
+ name: BibStringArgs
55
+ id: int | None
56
+
57
+
58
+ def default_base_named_renderable(**kwargs: Unpack[BaseNamedRenderableArgs]) -> BaseNamedRenderable:
59
+ """
60
+ Create a default BaseNamedRenderable object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
61
+ """
62
+ return BaseNamedRenderable(
63
+ name=default_bib_string(**kwargs.get("name", {})),
64
+ id=kwargs.get("id", None),
65
+ )
66
+
67
+
68
+ ############
69
+ # Author
70
+ ############
71
+
72
+
73
+ class AuthorArgs(TypedDict, total=False):
74
+ given_name: BibStringArgs
75
+ family_name: BibStringArgs
76
+ mononym: BibStringArgs
77
+ shorthand: BibStringArgs
78
+ famous_name: BibStringArgs
79
+ publications: Tuple[BibItem, ...]
80
+ id: int | None
81
+
82
+
83
+ def default_author(**kwargs: Unpack[AuthorArgs]) -> Author:
84
+ """
85
+ Create a default Author object, given a dictionary with any (or None) of its attributes. Defaults to empty strings and an empty tuple for publications if not provided.
86
+ """
87
+
88
+ return Author(
89
+ given_name=default_bib_string(**kwargs.get("given_name", {})),
90
+ family_name=default_bib_string(**kwargs.get("family_name", {})),
91
+ mononym=default_bib_string(**kwargs.get("mononym", {})),
92
+ shorthand=default_bib_string(**kwargs.get("shorthand", {})),
93
+ famous_name=default_bib_string(**kwargs.get("famous_name", {})),
94
+ publications=kwargs.get("publications", ()),
95
+ id=kwargs.get("id", None),
96
+ )
97
+
98
+
99
+ ############
100
+ # Journal
101
+ ############
102
+
103
+
104
+ class JournalArgs(TypedDict, total=False):
105
+ name: BibStringArgs
106
+ issn_print: str
107
+ issn_electronic: str
108
+ id: int | None
109
+
110
+
111
+ def default_journal(**kwargs: Unpack[JournalArgs]) -> Journal | None:
112
+ """
113
+ Create a default Journal object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
114
+ """
115
+ if kwargs == {}:
116
+ return None
117
+
118
+ return Journal(
119
+ name=default_bib_string(**kwargs.get("name", {})),
120
+ issn_print=kwargs.get("issn_print", ""),
121
+ issn_electronic=kwargs.get("issn_electronic", ""),
122
+ id=kwargs.get("id", None),
123
+ )
124
+
125
+
126
+ ############
127
+ # Support Args
128
+ ############
129
+
130
+
131
+ class PageArgs(TypedDict, total=False):
132
+ start: str
133
+ end: str
134
+
135
+
136
+ def default_page(**kwargs: Unpack[PageArgs]) -> PageAttr:
137
+ return PageAttr(
138
+ start=kwargs.get("start", ""),
139
+ end=kwargs.get("end", ""),
140
+ )
141
+
142
+
143
+ class KeywordsArgs(TypedDict, total=False):
144
+ level_1: str
145
+ level_2: str
146
+ level_3: str
147
+
148
+
149
+ def default_keywords(**kwargs: Unpack[KeywordsArgs]) -> KeywordsAttr:
150
+ return KeywordsAttr(
151
+ level_1=Keyword(name=kwargs.get("level_1", "")),
152
+ level_2=Keyword(name=kwargs.get("level_2", "")),
153
+ level_3=Keyword(name=kwargs.get("level_3", "")),
154
+ )
155
+
156
+
157
+ class BibItemDateArgs(TypedDict, total=False):
158
+ year: int
159
+ year_part_2_hyphen: int | None
160
+ year_part_2_slash: int | None
161
+ month: int | None
162
+ day: int | None
163
+
164
+
165
+ def default_bib_item_date(**kwargs: Unpack[BibItemDateArgs]) -> BibItemDateAttr:
166
+ return BibItemDateAttr(
167
+ year=kwargs.get("year", 0),
168
+ year_part_2_hyphen=kwargs.get("year_part_2_hyphen"),
169
+ year_part_2_slash=kwargs.get("year_part_2_slash"),
170
+ month=kwargs.get("month"),
171
+ day=kwargs.get("day"),
172
+ )
173
+
174
+
175
+ def parse_date(date: BibItemDateArgs | Literal["no date"]) -> BibItemDateAttr | Literal["no date"]:
176
+ if isinstance(date, dict):
177
+ return default_bib_item_date(**date)
178
+ else:
179
+ return "no date"
180
+
181
+
182
+ class BibKeyArgs(TypedDict, total=False):
183
+ first_author: str
184
+ other_authors: str
185
+ date: int | TBasicPubState
186
+ date_suffix: str
187
+
188
+
189
+ def default_bib_key(**kwargs: Unpack[BibKeyArgs]) -> BibKeyAttr:
190
+ # Then pass to BibKeyAttr
191
+ return BibKeyAttr(
192
+ first_author=kwargs.get("first_author", ""),
193
+ other_authors=kwargs.get("other_authors", ""),
194
+ date=kwargs.get("date", ""),
195
+ date_suffix=kwargs.get("date_suffix", ""),
196
+ )
197
+
198
+
199
+ ############
200
+ # BibItem Args
201
+ ############
202
+
203
+
204
+ class BibItemArgs(TypedDict, total=False):
205
+ _to_do_general: str
206
+ _change_request: str
207
+ entry_type: TBibTeXEntryType
208
+ bibkey: BibKeyArgs
209
+ author: Tuple[AuthorArgs, ...]
210
+ editor: Tuple[AuthorArgs, ...]
211
+ options: Tuple[str, ...]
212
+ date: BibItemDateArgs | Literal["no date"]
213
+ pubstate: TPubState
214
+ title: BibStringArgs
215
+ booktitle: BibStringArgs
216
+ # crossref: dict
217
+ journal: JournalArgs
218
+ volume: str
219
+ number: str
220
+ pages: Tuple[PageArgs, ...]
221
+ eid: str
222
+ series: BaseNamedRenderableArgs
223
+ address: BibStringArgs
224
+ institution: BibStringArgs
225
+ school: BibStringArgs
226
+ publisher: BibStringArgs
227
+ type: BibStringArgs
228
+ edition: int
229
+ note: BibStringArgs
230
+ issuetitle: BibStringArgs
231
+ _guesteditor: Tuple[AuthorArgs, ...]
232
+ _extra_note: BibStringArgs
233
+ urn: str
234
+ eprint: str
235
+ doi: str
236
+ url: str
237
+ _kws: KeywordsArgs
238
+ _epoch: TEpoch
239
+ _person: AuthorArgs
240
+ _comm_for_profile_bib: str
241
+ _langid: TLanguageID
242
+ _lang_der: str
243
+ _further_refs: Tuple[BibKeyArgs, ...]
244
+ _depends_on: Tuple[BibKeyArgs, ...]
245
+ _dltc_num: int
246
+ _spec_interest: str
247
+ _note_perso: str
248
+ _note_stock: str
249
+ _note_status: str
250
+ _num_inwork_coll: int
251
+ _num_inwork: str
252
+ _num_coll: int
253
+ _dltc_copyediting_note: str
254
+ _note_missing: str
255
+ _num_sort: int
256
+ id: int
257
+ _bib_info_source: str
258
+
259
+
260
+ def default_bib_item(**kwargs: Unpack[BibItemArgs]) -> BibItem:
261
+ return BibItem(
262
+ to_do_general=kwargs.get("_to_do_general", ""),
263
+ change_request=kwargs.get("_change_request", ""),
264
+ entry_type=kwargs.get("entry_type", "UNKNOWN"),
265
+ bibkey=default_bib_key(**kwargs.get("bibkey", {})) if "bibkey" in kwargs else "",
266
+ author=tuple(default_author(**a) for a in kwargs.get("author", ())),
267
+ editor=tuple(default_author(**e) for e in kwargs.get("editor", ())),
268
+ options=kwargs.get("options", ()),
269
+ date=parse_date(kwargs.get("date", "no date")),
270
+ pubstate=kwargs.get("pubstate", ""),
271
+ title=default_bib_string(**kwargs.get("title", {})) if "title" in kwargs else "",
272
+ booktitle=default_bib_string(**kwargs.get("booktitle", {})) if "booktitle" in kwargs else "",
273
+ crossref="", # Crossref is not defined in the provided context, so we leave it as an empty string
274
+ journal=default_journal(**kwargs.get("journal", {})) if "journal" in kwargs else None,
275
+ volume=kwargs.get("volume", ""),
276
+ number=kwargs.get("number", ""),
277
+ pages=tuple(default_page(**p) for p in kwargs.get("pages", ())),
278
+ eid=kwargs.get("eid", ""),
279
+ series=default_base_named_renderable(**kwargs.get("series", {})) if "series" in kwargs else "",
280
+ address=default_bib_string(**kwargs.get("address", {})) if "address" in kwargs else "",
281
+ institution=default_bib_string(**kwargs.get("institution", {})) if "institution" in kwargs else "",
282
+ school=default_bib_string(**kwargs.get("school", {})) if "school" in kwargs else "",
283
+ publisher=default_bib_string(**kwargs.get("publisher", {})) if "publisher" in kwargs else "",
284
+ type=default_bib_string(**kwargs.get("type", {})) if "type" in kwargs else "",
285
+ edition=kwargs.get("edition"),
286
+ note=default_bib_string(**kwargs.get("note", {})) if "note" in kwargs else "",
287
+ issuetitle=default_bib_string(**kwargs.get("issuetitle", {})) if "issuetitle" in kwargs else "",
288
+ guesteditor=tuple(default_author(**a) for a in kwargs.get("_guesteditor", ())),
289
+ extra_note=default_bib_string(**kwargs.get("_extra_note", {})) if "_extra_note" in kwargs else "",
290
+ urn=kwargs.get("urn", ""),
291
+ eprint=kwargs.get("eprint", ""),
292
+ doi=kwargs.get("doi", ""),
293
+ url=kwargs.get("url", ""),
294
+ kws=default_keywords(**kwargs.get("_kws", {})) if "_kws" in kwargs else "",
295
+ epoch=kwargs.get("_epoch", ""),
296
+ person=default_author(**kwargs.get("_person", {})) if "_person" in kwargs else "",
297
+ comm_for_profile_bib=kwargs.get("_comm_for_profile_bib", ""),
298
+ langid=kwargs.get("_langid", ""),
299
+ lang_der=kwargs.get("_lang_der", ""),
300
+ further_refs=tuple(default_bib_key(**b) for b in kwargs.get("_further_refs", ())),
301
+ depends_on=tuple(default_bib_key(**b) for b in kwargs.get("_depends_on", ())),
302
+ dltc_num=kwargs.get("_dltc_num"),
303
+ spec_interest=kwargs.get("_spec_interest", ""),
304
+ note_perso=kwargs.get("_note_perso", ""),
305
+ note_stock=kwargs.get("_note_stock", ""),
306
+ note_status=kwargs.get("_note_status", ""),
307
+ num_inwork_coll=kwargs.get("_num_inwork_coll"),
308
+ num_inwork=kwargs.get("_num_inwork", ""),
309
+ num_coll=kwargs.get("_num_coll"),
310
+ dltc_copyediting_note=kwargs.get("_dltc_copyediting_note", ""),
311
+ note_missing=kwargs.get("_note_missing", ""),
312
+ num_sort=kwargs.get("_num_sort"),
313
+ id=kwargs.get("id"),
314
+ bib_info_source=kwargs.get("_bib_info_source", ""),
315
+ )
@@ -0,0 +1,31 @@
1
+ """Logic functions for bibliography processing."""
2
+
3
+ from philoch_bib_sdk.logic.functions.comparator import (
4
+ compare_bibitems,
5
+ compare_bibitems_detailed,
6
+ )
7
+ from philoch_bib_sdk.logic.functions.fuzzy_matcher import (
8
+ BibItemBlockIndex,
9
+ build_index,
10
+ build_index_cached,
11
+ find_similar_bibitems,
12
+ stage_bibitem,
13
+ stage_bibitems_batch,
14
+ _RUST_SCORER_AVAILABLE,
15
+ )
16
+ from philoch_bib_sdk.logic.functions.journal_article_matcher import (
17
+ get_bibkey_by_journal_volume_number,
18
+ )
19
+
20
+ __all__ = [
21
+ "compare_bibitems",
22
+ "compare_bibitems_detailed",
23
+ "BibItemBlockIndex",
24
+ "build_index",
25
+ "build_index_cached",
26
+ "find_similar_bibitems",
27
+ "stage_bibitem",
28
+ "stage_bibitems_batch",
29
+ "get_bibkey_by_journal_volume_number",
30
+ "_RUST_SCORER_AVAILABLE",
31
+ ]