philoch-bib-sdk 0.3.9__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- philoch_bib_sdk/__init__.py +0 -0
- philoch_bib_sdk/_rust.cp313-win_amd64.pyd +0 -0
- philoch_bib_sdk/adapters/io/__init__.py +115 -0
- philoch_bib_sdk/adapters/io/csv/__init__.py +308 -0
- philoch_bib_sdk/adapters/io/ods/__init__.py +145 -0
- philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
- philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +58 -0
- philoch_bib_sdk/converters/latex.py +6 -0
- philoch_bib_sdk/converters/plaintext/author/formatter.py +34 -0
- philoch_bib_sdk/converters/plaintext/author/parser.py +83 -0
- philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +8 -0
- philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +21 -0
- philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +158 -0
- philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +37 -0
- philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +62 -0
- philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +182 -0
- philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +13 -0
- philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +63 -0
- philoch_bib_sdk/converters/plaintext/bibitem/parser.py +415 -0
- philoch_bib_sdk/converters/plaintext/journal/formatter.py +25 -0
- philoch_bib_sdk/converters/plaintext/journal/parser.py +36 -0
- philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +25 -0
- philoch_bib_sdk/interfaces/cli/__init__.py +3 -0
- philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +135 -0
- philoch_bib_sdk/logic/__init__.py +39 -0
- philoch_bib_sdk/logic/default_models.py +315 -0
- philoch_bib_sdk/logic/functions/__init__.py +31 -0
- philoch_bib_sdk/logic/functions/comparator.py +414 -0
- philoch_bib_sdk/logic/functions/fuzzy_matcher.py +796 -0
- philoch_bib_sdk/logic/functions/journal_article_matcher.py +44 -0
- philoch_bib_sdk/logic/literals.py +98 -0
- philoch_bib_sdk/logic/models.py +366 -0
- philoch_bib_sdk/logic/models_staging.py +173 -0
- philoch_bib_sdk/procedures/fuzzy_matching.py +112 -0
- philoch_bib_sdk/py.typed +0 -0
- philoch_bib_sdk/rust_scorer/Cargo.lock +232 -0
- philoch_bib_sdk/rust_scorer/Cargo.toml +26 -0
- philoch_bib_sdk/rust_scorer/pyproject.toml +15 -0
- philoch_bib_sdk/rust_scorer/rust_scorer.pyi +65 -0
- philoch_bib_sdk/rust_scorer/src/lib.rs +362 -0
- philoch_bib_sdk-0.3.9.dist-info/METADATA +15 -0
- philoch_bib_sdk-0.3.9.dist-info/RECORD +44 -0
- philoch_bib_sdk-0.3.9.dist-info/WHEEL +4 -0
- philoch_bib_sdk-0.3.9.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
from typing import Tuple, TypedDict, Unpack, Literal
|
|
2
|
+
from philoch_bib_sdk.logic.models import BibItem, PageAttr, KeywordsAttr, BibItemDateAttr, BibKeyAttr, Keyword
|
|
3
|
+
|
|
4
|
+
from philoch_bib_sdk.logic.literals import TBasicPubState, TBibTeXEntryType, TEpoch, TLanguageID, TPubState
|
|
5
|
+
from philoch_bib_sdk.logic.models import (
|
|
6
|
+
Author,
|
|
7
|
+
BaseNamedRenderable,
|
|
8
|
+
BaseRenderable,
|
|
9
|
+
BibItem,
|
|
10
|
+
BibStringAttr,
|
|
11
|
+
Journal,
|
|
12
|
+
Keyword,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BibStringArgs(TypedDict, total=False):
|
|
17
|
+
latex: str
|
|
18
|
+
unicode: str
|
|
19
|
+
simplified: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def default_bib_string(**kwargs: Unpack[BibStringArgs]) -> BibStringAttr:
|
|
23
|
+
"""
|
|
24
|
+
Create a default BibString object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
|
|
25
|
+
"""
|
|
26
|
+
return BibStringAttr(
|
|
27
|
+
latex=kwargs.get("latex", ""),
|
|
28
|
+
unicode=kwargs.get("unicode", ""),
|
|
29
|
+
simplified=kwargs.get("simplified", ""),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
############
|
|
34
|
+
# Base Renderables
|
|
35
|
+
############
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BaseRenderableArgs(TypedDict, total=False):
|
|
39
|
+
text: BibStringArgs
|
|
40
|
+
id: int | None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def default_base_renderable(**kwargs: Unpack[BaseRenderableArgs]) -> BaseRenderable:
|
|
44
|
+
"""
|
|
45
|
+
Create a default BaseRenderable object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
|
|
46
|
+
"""
|
|
47
|
+
return BaseRenderable(
|
|
48
|
+
text=default_bib_string(**kwargs.get("text", {})),
|
|
49
|
+
id=kwargs.get("id", None),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BaseNamedRenderableArgs(TypedDict, total=False):
|
|
54
|
+
name: BibStringArgs
|
|
55
|
+
id: int | None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def default_base_named_renderable(**kwargs: Unpack[BaseNamedRenderableArgs]) -> BaseNamedRenderable:
|
|
59
|
+
"""
|
|
60
|
+
Create a default BaseNamedRenderable object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
|
|
61
|
+
"""
|
|
62
|
+
return BaseNamedRenderable(
|
|
63
|
+
name=default_bib_string(**kwargs.get("name", {})),
|
|
64
|
+
id=kwargs.get("id", None),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
############
|
|
69
|
+
# Author
|
|
70
|
+
############
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class AuthorArgs(TypedDict, total=False):
|
|
74
|
+
given_name: BibStringArgs
|
|
75
|
+
family_name: BibStringArgs
|
|
76
|
+
mononym: BibStringArgs
|
|
77
|
+
shorthand: BibStringArgs
|
|
78
|
+
famous_name: BibStringArgs
|
|
79
|
+
publications: Tuple[BibItem, ...]
|
|
80
|
+
id: int | None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def default_author(**kwargs: Unpack[AuthorArgs]) -> Author:
|
|
84
|
+
"""
|
|
85
|
+
Create a default Author object, given a dictionary with any (or None) of its attributes. Defaults to empty strings and an empty tuple for publications if not provided.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
return Author(
|
|
89
|
+
given_name=default_bib_string(**kwargs.get("given_name", {})),
|
|
90
|
+
family_name=default_bib_string(**kwargs.get("family_name", {})),
|
|
91
|
+
mononym=default_bib_string(**kwargs.get("mononym", {})),
|
|
92
|
+
shorthand=default_bib_string(**kwargs.get("shorthand", {})),
|
|
93
|
+
famous_name=default_bib_string(**kwargs.get("famous_name", {})),
|
|
94
|
+
publications=kwargs.get("publications", ()),
|
|
95
|
+
id=kwargs.get("id", None),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
############
|
|
100
|
+
# Journal
|
|
101
|
+
############
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class JournalArgs(TypedDict, total=False):
|
|
105
|
+
name: BibStringArgs
|
|
106
|
+
issn_print: str
|
|
107
|
+
issn_electronic: str
|
|
108
|
+
id: int | None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def default_journal(**kwargs: Unpack[JournalArgs]) -> Journal | None:
|
|
112
|
+
"""
|
|
113
|
+
Create a default Journal object, given a dictionary with any (or None) of its attributes. Defaults to empty strings if not provided.
|
|
114
|
+
"""
|
|
115
|
+
if kwargs == {}:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
return Journal(
|
|
119
|
+
name=default_bib_string(**kwargs.get("name", {})),
|
|
120
|
+
issn_print=kwargs.get("issn_print", ""),
|
|
121
|
+
issn_electronic=kwargs.get("issn_electronic", ""),
|
|
122
|
+
id=kwargs.get("id", None),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
############
|
|
127
|
+
# Support Args
|
|
128
|
+
############
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class PageArgs(TypedDict, total=False):
|
|
132
|
+
start: str
|
|
133
|
+
end: str
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def default_page(**kwargs: Unpack[PageArgs]) -> PageAttr:
|
|
137
|
+
return PageAttr(
|
|
138
|
+
start=kwargs.get("start", ""),
|
|
139
|
+
end=kwargs.get("end", ""),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class KeywordsArgs(TypedDict, total=False):
|
|
144
|
+
level_1: str
|
|
145
|
+
level_2: str
|
|
146
|
+
level_3: str
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def default_keywords(**kwargs: Unpack[KeywordsArgs]) -> KeywordsAttr:
|
|
150
|
+
return KeywordsAttr(
|
|
151
|
+
level_1=Keyword(name=kwargs.get("level_1", "")),
|
|
152
|
+
level_2=Keyword(name=kwargs.get("level_2", "")),
|
|
153
|
+
level_3=Keyword(name=kwargs.get("level_3", "")),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class BibItemDateArgs(TypedDict, total=False):
|
|
158
|
+
year: int
|
|
159
|
+
year_part_2_hyphen: int | None
|
|
160
|
+
year_part_2_slash: int | None
|
|
161
|
+
month: int | None
|
|
162
|
+
day: int | None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def default_bib_item_date(**kwargs: Unpack[BibItemDateArgs]) -> BibItemDateAttr:
|
|
166
|
+
return BibItemDateAttr(
|
|
167
|
+
year=kwargs.get("year", 0),
|
|
168
|
+
year_part_2_hyphen=kwargs.get("year_part_2_hyphen"),
|
|
169
|
+
year_part_2_slash=kwargs.get("year_part_2_slash"),
|
|
170
|
+
month=kwargs.get("month"),
|
|
171
|
+
day=kwargs.get("day"),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def parse_date(date: BibItemDateArgs | Literal["no date"]) -> BibItemDateAttr | Literal["no date"]:
|
|
176
|
+
if isinstance(date, dict):
|
|
177
|
+
return default_bib_item_date(**date)
|
|
178
|
+
else:
|
|
179
|
+
return "no date"
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class BibKeyArgs(TypedDict, total=False):
|
|
183
|
+
first_author: str
|
|
184
|
+
other_authors: str
|
|
185
|
+
date: int | TBasicPubState
|
|
186
|
+
date_suffix: str
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def default_bib_key(**kwargs: Unpack[BibKeyArgs]) -> BibKeyAttr:
|
|
190
|
+
# Then pass to BibKeyAttr
|
|
191
|
+
return BibKeyAttr(
|
|
192
|
+
first_author=kwargs.get("first_author", ""),
|
|
193
|
+
other_authors=kwargs.get("other_authors", ""),
|
|
194
|
+
date=kwargs.get("date", ""),
|
|
195
|
+
date_suffix=kwargs.get("date_suffix", ""),
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
############
|
|
200
|
+
# BibItem Args
|
|
201
|
+
############
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class BibItemArgs(TypedDict, total=False):
|
|
205
|
+
_to_do_general: str
|
|
206
|
+
_change_request: str
|
|
207
|
+
entry_type: TBibTeXEntryType
|
|
208
|
+
bibkey: BibKeyArgs
|
|
209
|
+
author: Tuple[AuthorArgs, ...]
|
|
210
|
+
editor: Tuple[AuthorArgs, ...]
|
|
211
|
+
options: Tuple[str, ...]
|
|
212
|
+
date: BibItemDateArgs | Literal["no date"]
|
|
213
|
+
pubstate: TPubState
|
|
214
|
+
title: BibStringArgs
|
|
215
|
+
booktitle: BibStringArgs
|
|
216
|
+
# crossref: dict
|
|
217
|
+
journal: JournalArgs
|
|
218
|
+
volume: str
|
|
219
|
+
number: str
|
|
220
|
+
pages: Tuple[PageArgs, ...]
|
|
221
|
+
eid: str
|
|
222
|
+
series: BaseNamedRenderableArgs
|
|
223
|
+
address: BibStringArgs
|
|
224
|
+
institution: BibStringArgs
|
|
225
|
+
school: BibStringArgs
|
|
226
|
+
publisher: BibStringArgs
|
|
227
|
+
type: BibStringArgs
|
|
228
|
+
edition: int
|
|
229
|
+
note: BibStringArgs
|
|
230
|
+
issuetitle: BibStringArgs
|
|
231
|
+
_guesteditor: Tuple[AuthorArgs, ...]
|
|
232
|
+
_extra_note: BibStringArgs
|
|
233
|
+
urn: str
|
|
234
|
+
eprint: str
|
|
235
|
+
doi: str
|
|
236
|
+
url: str
|
|
237
|
+
_kws: KeywordsArgs
|
|
238
|
+
_epoch: TEpoch
|
|
239
|
+
_person: AuthorArgs
|
|
240
|
+
_comm_for_profile_bib: str
|
|
241
|
+
_langid: TLanguageID
|
|
242
|
+
_lang_der: str
|
|
243
|
+
_further_refs: Tuple[BibKeyArgs, ...]
|
|
244
|
+
_depends_on: Tuple[BibKeyArgs, ...]
|
|
245
|
+
_dltc_num: int
|
|
246
|
+
_spec_interest: str
|
|
247
|
+
_note_perso: str
|
|
248
|
+
_note_stock: str
|
|
249
|
+
_note_status: str
|
|
250
|
+
_num_inwork_coll: int
|
|
251
|
+
_num_inwork: str
|
|
252
|
+
_num_coll: int
|
|
253
|
+
_dltc_copyediting_note: str
|
|
254
|
+
_note_missing: str
|
|
255
|
+
_num_sort: int
|
|
256
|
+
id: int
|
|
257
|
+
_bib_info_source: str
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def default_bib_item(**kwargs: Unpack[BibItemArgs]) -> BibItem:
|
|
261
|
+
return BibItem(
|
|
262
|
+
to_do_general=kwargs.get("_to_do_general", ""),
|
|
263
|
+
change_request=kwargs.get("_change_request", ""),
|
|
264
|
+
entry_type=kwargs.get("entry_type", "UNKNOWN"),
|
|
265
|
+
bibkey=default_bib_key(**kwargs.get("bibkey", {})) if "bibkey" in kwargs else "",
|
|
266
|
+
author=tuple(default_author(**a) for a in kwargs.get("author", ())),
|
|
267
|
+
editor=tuple(default_author(**e) for e in kwargs.get("editor", ())),
|
|
268
|
+
options=kwargs.get("options", ()),
|
|
269
|
+
date=parse_date(kwargs.get("date", "no date")),
|
|
270
|
+
pubstate=kwargs.get("pubstate", ""),
|
|
271
|
+
title=default_bib_string(**kwargs.get("title", {})) if "title" in kwargs else "",
|
|
272
|
+
booktitle=default_bib_string(**kwargs.get("booktitle", {})) if "booktitle" in kwargs else "",
|
|
273
|
+
crossref="", # Crossref is not defined in the provided context, so we leave it as an empty string
|
|
274
|
+
journal=default_journal(**kwargs.get("journal", {})) if "journal" in kwargs else None,
|
|
275
|
+
volume=kwargs.get("volume", ""),
|
|
276
|
+
number=kwargs.get("number", ""),
|
|
277
|
+
pages=tuple(default_page(**p) for p in kwargs.get("pages", ())),
|
|
278
|
+
eid=kwargs.get("eid", ""),
|
|
279
|
+
series=default_base_named_renderable(**kwargs.get("series", {})) if "series" in kwargs else "",
|
|
280
|
+
address=default_bib_string(**kwargs.get("address", {})) if "address" in kwargs else "",
|
|
281
|
+
institution=default_bib_string(**kwargs.get("institution", {})) if "institution" in kwargs else "",
|
|
282
|
+
school=default_bib_string(**kwargs.get("school", {})) if "school" in kwargs else "",
|
|
283
|
+
publisher=default_bib_string(**kwargs.get("publisher", {})) if "publisher" in kwargs else "",
|
|
284
|
+
type=default_bib_string(**kwargs.get("type", {})) if "type" in kwargs else "",
|
|
285
|
+
edition=kwargs.get("edition"),
|
|
286
|
+
note=default_bib_string(**kwargs.get("note", {})) if "note" in kwargs else "",
|
|
287
|
+
issuetitle=default_bib_string(**kwargs.get("issuetitle", {})) if "issuetitle" in kwargs else "",
|
|
288
|
+
guesteditor=tuple(default_author(**a) for a in kwargs.get("_guesteditor", ())),
|
|
289
|
+
extra_note=default_bib_string(**kwargs.get("_extra_note", {})) if "_extra_note" in kwargs else "",
|
|
290
|
+
urn=kwargs.get("urn", ""),
|
|
291
|
+
eprint=kwargs.get("eprint", ""),
|
|
292
|
+
doi=kwargs.get("doi", ""),
|
|
293
|
+
url=kwargs.get("url", ""),
|
|
294
|
+
kws=default_keywords(**kwargs.get("_kws", {})) if "_kws" in kwargs else "",
|
|
295
|
+
epoch=kwargs.get("_epoch", ""),
|
|
296
|
+
person=default_author(**kwargs.get("_person", {})) if "_person" in kwargs else "",
|
|
297
|
+
comm_for_profile_bib=kwargs.get("_comm_for_profile_bib", ""),
|
|
298
|
+
langid=kwargs.get("_langid", ""),
|
|
299
|
+
lang_der=kwargs.get("_lang_der", ""),
|
|
300
|
+
further_refs=tuple(default_bib_key(**b) for b in kwargs.get("_further_refs", ())),
|
|
301
|
+
depends_on=tuple(default_bib_key(**b) for b in kwargs.get("_depends_on", ())),
|
|
302
|
+
dltc_num=kwargs.get("_dltc_num"),
|
|
303
|
+
spec_interest=kwargs.get("_spec_interest", ""),
|
|
304
|
+
note_perso=kwargs.get("_note_perso", ""),
|
|
305
|
+
note_stock=kwargs.get("_note_stock", ""),
|
|
306
|
+
note_status=kwargs.get("_note_status", ""),
|
|
307
|
+
num_inwork_coll=kwargs.get("_num_inwork_coll"),
|
|
308
|
+
num_inwork=kwargs.get("_num_inwork", ""),
|
|
309
|
+
num_coll=kwargs.get("_num_coll"),
|
|
310
|
+
dltc_copyediting_note=kwargs.get("_dltc_copyediting_note", ""),
|
|
311
|
+
note_missing=kwargs.get("_note_missing", ""),
|
|
312
|
+
num_sort=kwargs.get("_num_sort"),
|
|
313
|
+
id=kwargs.get("id"),
|
|
314
|
+
bib_info_source=kwargs.get("_bib_info_source", ""),
|
|
315
|
+
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Logic functions for bibliography processing."""
|
|
2
|
+
|
|
3
|
+
from philoch_bib_sdk.logic.functions.comparator import (
|
|
4
|
+
compare_bibitems,
|
|
5
|
+
compare_bibitems_detailed,
|
|
6
|
+
)
|
|
7
|
+
from philoch_bib_sdk.logic.functions.fuzzy_matcher import (
|
|
8
|
+
BibItemBlockIndex,
|
|
9
|
+
build_index,
|
|
10
|
+
build_index_cached,
|
|
11
|
+
find_similar_bibitems,
|
|
12
|
+
stage_bibitem,
|
|
13
|
+
stage_bibitems_batch,
|
|
14
|
+
_RUST_SCORER_AVAILABLE,
|
|
15
|
+
)
|
|
16
|
+
from philoch_bib_sdk.logic.functions.journal_article_matcher import (
|
|
17
|
+
get_bibkey_by_journal_volume_number,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"compare_bibitems",
|
|
22
|
+
"compare_bibitems_detailed",
|
|
23
|
+
"BibItemBlockIndex",
|
|
24
|
+
"build_index",
|
|
25
|
+
"build_index_cached",
|
|
26
|
+
"find_similar_bibitems",
|
|
27
|
+
"stage_bibitem",
|
|
28
|
+
"stage_bibitems_batch",
|
|
29
|
+
"get_bibkey_by_journal_volume_number",
|
|
30
|
+
"_RUST_SCORER_AVAILABLE",
|
|
31
|
+
]
|