scitex 2.4.2__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__version__.py +1 -1
- scitex/browser/__init__.py +53 -0
- scitex/browser/debugging/__init__.py +56 -0
- scitex/browser/debugging/_failure_capture.py +372 -0
- scitex/browser/debugging/_sync_session.py +259 -0
- scitex/browser/debugging/_test_monitor.py +284 -0
- scitex/browser/debugging/_visual_cursor.py +432 -0
- scitex/scholar/citation_graph/database.py +9 -2
- scitex/scholar/config/ScholarConfig.py +23 -3
- scitex/scholar/config/default.yaml +55 -0
- scitex/scholar/core/Paper.py +102 -0
- scitex/scholar/core/__init__.py +44 -0
- scitex/scholar/core/journal_normalizer.py +524 -0
- scitex/scholar/core/oa_cache.py +285 -0
- scitex/scholar/core/open_access.py +457 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +137 -0
- scitex/scholar/pdf_download/strategies/__init__.py +6 -0
- scitex/scholar/pdf_download/strategies/open_access_download.py +186 -0
- scitex/scholar/pipelines/ScholarPipelineSearchParallel.py +18 -3
- scitex/scholar/pipelines/ScholarPipelineSearchSingle.py +15 -2
- {scitex-2.4.2.dist-info → scitex-2.4.3.dist-info}/METADATA +1 -1
- {scitex-2.4.2.dist-info → scitex-2.4.3.dist-info}/RECORD +25 -17
- {scitex-2.4.2.dist-info → scitex-2.4.3.dist-info}/WHEEL +0 -0
- {scitex-2.4.2.dist-info → scitex-2.4.3.dist-info}/entry_points.txt +0 -0
- {scitex-2.4.2.dist-info → scitex-2.4.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: 2025-12-08
|
|
4
|
+
# File: /home/ywatanabe/proj/scitex-code/src/scitex/browser/debugging/_visual_cursor.py
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Visual cursor and click effects for E2E test feedback.
|
|
8
|
+
|
|
9
|
+
Provides visual feedback during browser automation:
|
|
10
|
+
- Visual cursor indicator that follows mouse movements
|
|
11
|
+
- Click ripple effects
|
|
12
|
+
- Drag state visualization
|
|
13
|
+
- Step progress messages
|
|
14
|
+
|
|
15
|
+
Works with both async and sync Playwright APIs.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import TYPE_CHECKING, Union
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from playwright.async_api import Page as AsyncPage
|
|
24
|
+
from playwright.sync_api import Page as SyncPage
|
|
25
|
+
|
|
26
|
+
# CSS styles for visual effects
|
|
27
|
+
VISUAL_EFFECTS_CSS = """
|
|
28
|
+
/* Visual cursor indicator */
|
|
29
|
+
#_scitex_cursor {
|
|
30
|
+
position: fixed;
|
|
31
|
+
width: 24px;
|
|
32
|
+
height: 24px;
|
|
33
|
+
border: 3px solid #FF4444;
|
|
34
|
+
border-radius: 50%;
|
|
35
|
+
pointer-events: none;
|
|
36
|
+
z-index: 2147483647;
|
|
37
|
+
transform: translate(-50%, -50%);
|
|
38
|
+
transition: all 0.15s ease-out;
|
|
39
|
+
box-shadow: 0 0 15px rgba(255, 68, 68, 0.6);
|
|
40
|
+
display: none;
|
|
41
|
+
}
|
|
42
|
+
#_scitex_cursor.clicking {
|
|
43
|
+
transform: translate(-50%, -50%) scale(0.6);
|
|
44
|
+
background: rgba(255, 68, 68, 0.4);
|
|
45
|
+
box-shadow: 0 0 25px rgba(255, 68, 68, 0.8);
|
|
46
|
+
}
|
|
47
|
+
#_scitex_cursor.dragging {
|
|
48
|
+
border-color: #28A745;
|
|
49
|
+
box-shadow: 0 0 15px rgba(40, 167, 69, 0.6);
|
|
50
|
+
width: 28px;
|
|
51
|
+
height: 28px;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/* Click ripple effect */
|
|
55
|
+
.scitex-click-ripple {
|
|
56
|
+
position: fixed;
|
|
57
|
+
border-radius: 50%;
|
|
58
|
+
border: 3px solid #FF4444;
|
|
59
|
+
pointer-events: none;
|
|
60
|
+
z-index: 2147483646;
|
|
61
|
+
animation: clickRipple 0.5s ease-out forwards;
|
|
62
|
+
}
|
|
63
|
+
@keyframes clickRipple {
|
|
64
|
+
0% { width: 0; height: 0; opacity: 1; transform: translate(-50%, -50%); }
|
|
65
|
+
100% { width: 80px; height: 80px; opacity: 0; transform: translate(-50%, -50%); }
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/* Step message container */
|
|
69
|
+
#_scitex_step_messages {
|
|
70
|
+
position: fixed;
|
|
71
|
+
top: 10px;
|
|
72
|
+
left: 10px;
|
|
73
|
+
z-index: 2147483647;
|
|
74
|
+
display: flex;
|
|
75
|
+
flex-direction: column;
|
|
76
|
+
gap: 8px;
|
|
77
|
+
max-width: 600px;
|
|
78
|
+
pointer-events: none;
|
|
79
|
+
}
|
|
80
|
+
.scitex-step-msg {
|
|
81
|
+
background: rgba(0, 0, 0, 0.9);
|
|
82
|
+
color: white;
|
|
83
|
+
padding: 14px 24px;
|
|
84
|
+
border-radius: 8px;
|
|
85
|
+
font-size: 16px;
|
|
86
|
+
font-family: 'Courier New', monospace;
|
|
87
|
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.5);
|
|
88
|
+
word-wrap: break-word;
|
|
89
|
+
animation: stepSlideIn 0.3s ease-out;
|
|
90
|
+
}
|
|
91
|
+
@keyframes stepSlideIn {
|
|
92
|
+
0% { opacity: 0; transform: translateX(-20px); }
|
|
93
|
+
100% { opacity: 1; transform: translateX(0); }
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/* Test result banner */
|
|
97
|
+
#_scitex_result_banner {
|
|
98
|
+
position: fixed;
|
|
99
|
+
top: 50%;
|
|
100
|
+
left: 50%;
|
|
101
|
+
transform: translate(-50%, -50%);
|
|
102
|
+
padding: 40px 80px;
|
|
103
|
+
border-radius: 16px;
|
|
104
|
+
font-size: 48px;
|
|
105
|
+
font-weight: bold;
|
|
106
|
+
font-family: 'Arial', sans-serif;
|
|
107
|
+
z-index: 2147483647;
|
|
108
|
+
pointer-events: none;
|
|
109
|
+
animation: resultPulse 0.5s ease-out;
|
|
110
|
+
}
|
|
111
|
+
#_scitex_result_banner.success {
|
|
112
|
+
background: rgba(40, 167, 69, 0.95);
|
|
113
|
+
color: white;
|
|
114
|
+
box-shadow: 0 0 50px rgba(40, 167, 69, 0.8);
|
|
115
|
+
}
|
|
116
|
+
#_scitex_result_banner.failure {
|
|
117
|
+
background: rgba(220, 53, 69, 0.95);
|
|
118
|
+
color: white;
|
|
119
|
+
box-shadow: 0 0 50px rgba(220, 53, 69, 0.8);
|
|
120
|
+
}
|
|
121
|
+
@keyframes resultPulse {
|
|
122
|
+
0% { transform: translate(-50%, -50%) scale(0.5); opacity: 0; }
|
|
123
|
+
50% { transform: translate(-50%, -50%) scale(1.1); }
|
|
124
|
+
100% { transform: translate(-50%, -50%) scale(1); opacity: 1; }
|
|
125
|
+
}
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
# JavaScript to inject visual effects
|
|
129
|
+
INJECT_EFFECTS_JS = f"""
|
|
130
|
+
() => {{
|
|
131
|
+
if (document.getElementById('_scitex_visual_effects')) return;
|
|
132
|
+
|
|
133
|
+
const style = document.createElement('style');
|
|
134
|
+
style.id = '_scitex_visual_effects';
|
|
135
|
+
style.textContent = `{VISUAL_EFFECTS_CSS}`;
|
|
136
|
+
document.head.appendChild(style);
|
|
137
|
+
|
|
138
|
+
// Create cursor element
|
|
139
|
+
const cursor = document.createElement('div');
|
|
140
|
+
cursor.id = '_scitex_cursor';
|
|
141
|
+
document.body.appendChild(cursor);
|
|
142
|
+
|
|
143
|
+
// Create step messages container
|
|
144
|
+
const msgContainer = document.createElement('div');
|
|
145
|
+
msgContainer.id = '_scitex_step_messages';
|
|
146
|
+
document.body.appendChild(msgContainer);
|
|
147
|
+
}}
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def inject_visual_effects(page: Union["AsyncPage", "SyncPage"]) -> None:
|
|
152
|
+
"""Inject CSS and elements for visual effects (sync version)."""
|
|
153
|
+
page.evaluate(INJECT_EFFECTS_JS)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
async def inject_visual_effects_async(page: "AsyncPage") -> None:
|
|
157
|
+
"""Inject CSS and elements for visual effects (async version)."""
|
|
158
|
+
await page.evaluate(INJECT_EFFECTS_JS)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def show_cursor_at(
|
|
162
|
+
page: Union["AsyncPage", "SyncPage"],
|
|
163
|
+
x: float,
|
|
164
|
+
y: float,
|
|
165
|
+
state: str = "normal"
|
|
166
|
+
) -> None:
|
|
167
|
+
"""Move visual cursor to position (sync version).
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
page: Playwright page object
|
|
171
|
+
x: X coordinate
|
|
172
|
+
y: Y coordinate
|
|
173
|
+
state: Cursor state - "normal", "clicking", or "dragging"
|
|
174
|
+
"""
|
|
175
|
+
page.evaluate("""
|
|
176
|
+
([x, y, state]) => {
|
|
177
|
+
let cursor = document.getElementById('_scitex_cursor');
|
|
178
|
+
if (!cursor) {
|
|
179
|
+
cursor = document.createElement('div');
|
|
180
|
+
cursor.id = '_scitex_cursor';
|
|
181
|
+
document.body.appendChild(cursor);
|
|
182
|
+
}
|
|
183
|
+
cursor.style.display = 'block';
|
|
184
|
+
cursor.style.left = x + 'px';
|
|
185
|
+
cursor.style.top = y + 'px';
|
|
186
|
+
cursor.className = state === 'clicking' ? 'clicking' :
|
|
187
|
+
state === 'dragging' ? 'dragging' : '';
|
|
188
|
+
if (state === 'dragging') {
|
|
189
|
+
cursor.style.borderColor = '#28A745';
|
|
190
|
+
cursor.style.boxShadow = '0 0 15px rgba(40, 167, 69, 0.6)';
|
|
191
|
+
} else {
|
|
192
|
+
cursor.style.borderColor = '#FF4444';
|
|
193
|
+
cursor.style.boxShadow = '0 0 15px rgba(255, 68, 68, 0.6)';
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
""", [x, y, state])
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
async def show_cursor_at_async(
|
|
200
|
+
page: "AsyncPage",
|
|
201
|
+
x: float,
|
|
202
|
+
y: float,
|
|
203
|
+
state: str = "normal"
|
|
204
|
+
) -> None:
|
|
205
|
+
"""Move visual cursor to position (async version)."""
|
|
206
|
+
await page.evaluate("""
|
|
207
|
+
([x, y, state]) => {
|
|
208
|
+
let cursor = document.getElementById('_scitex_cursor');
|
|
209
|
+
if (!cursor) {
|
|
210
|
+
cursor = document.createElement('div');
|
|
211
|
+
cursor.id = '_scitex_cursor';
|
|
212
|
+
document.body.appendChild(cursor);
|
|
213
|
+
}
|
|
214
|
+
cursor.style.display = 'block';
|
|
215
|
+
cursor.style.left = x + 'px';
|
|
216
|
+
cursor.style.top = y + 'px';
|
|
217
|
+
cursor.className = state === 'clicking' ? 'clicking' :
|
|
218
|
+
state === 'dragging' ? 'dragging' : '';
|
|
219
|
+
if (state === 'dragging') {
|
|
220
|
+
cursor.style.borderColor = '#28A745';
|
|
221
|
+
cursor.style.boxShadow = '0 0 15px rgba(40, 167, 69, 0.6)';
|
|
222
|
+
} else {
|
|
223
|
+
cursor.style.borderColor = '#FF4444';
|
|
224
|
+
cursor.style.boxShadow = '0 0 15px rgba(255, 68, 68, 0.6)';
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
""", [x, y, state])
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def show_click_effect(page: Union["AsyncPage", "SyncPage"], x: float, y: float) -> None:
|
|
231
|
+
"""Show click ripple effect at position (sync version)."""
|
|
232
|
+
page.evaluate("""
|
|
233
|
+
([x, y]) => {
|
|
234
|
+
const ripple = document.createElement('div');
|
|
235
|
+
ripple.className = 'scitex-click-ripple';
|
|
236
|
+
ripple.style.left = x + 'px';
|
|
237
|
+
ripple.style.top = y + 'px';
|
|
238
|
+
document.body.appendChild(ripple);
|
|
239
|
+
setTimeout(() => ripple.remove(), 600);
|
|
240
|
+
|
|
241
|
+
const cursor = document.getElementById('_scitex_cursor');
|
|
242
|
+
if (cursor) {
|
|
243
|
+
cursor.classList.add('clicking');
|
|
244
|
+
setTimeout(() => cursor.classList.remove('clicking'), 150);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
""", [x, y])
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
async def show_click_effect_async(page: "AsyncPage", x: float, y: float) -> None:
|
|
251
|
+
"""Show click ripple effect at position (async version)."""
|
|
252
|
+
await page.evaluate("""
|
|
253
|
+
([x, y]) => {
|
|
254
|
+
const ripple = document.createElement('div');
|
|
255
|
+
ripple.className = 'scitex-click-ripple';
|
|
256
|
+
ripple.style.left = x + 'px';
|
|
257
|
+
ripple.style.top = y + 'px';
|
|
258
|
+
document.body.appendChild(ripple);
|
|
259
|
+
setTimeout(() => ripple.remove(), 600);
|
|
260
|
+
|
|
261
|
+
const cursor = document.getElementById('_scitex_cursor');
|
|
262
|
+
if (cursor) {
|
|
263
|
+
cursor.classList.add('clicking');
|
|
264
|
+
setTimeout(() => cursor.classList.remove('clicking'), 150);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
""", [x, y])
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def show_step(
|
|
271
|
+
page: Union["AsyncPage", "SyncPage"],
|
|
272
|
+
step: int,
|
|
273
|
+
total: int,
|
|
274
|
+
message: str,
|
|
275
|
+
level: str = "info"
|
|
276
|
+
) -> None:
|
|
277
|
+
"""Show numbered step message in browser (sync version).
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
page: Playwright page object
|
|
281
|
+
step: Current step number
|
|
282
|
+
total: Total number of steps
|
|
283
|
+
message: Message to display
|
|
284
|
+
level: Message level - "info", "success", "warning", or "error"
|
|
285
|
+
"""
|
|
286
|
+
color_map = {
|
|
287
|
+
"info": "#17A2B8",
|
|
288
|
+
"success": "#28A745",
|
|
289
|
+
"warning": "#FFC107",
|
|
290
|
+
"error": "#DC3545",
|
|
291
|
+
}
|
|
292
|
+
color = color_map.get(level, color_map["info"])
|
|
293
|
+
|
|
294
|
+
page.evaluate("""
|
|
295
|
+
([step, total, message, color]) => {
|
|
296
|
+
let container = document.getElementById('_scitex_step_messages');
|
|
297
|
+
if (!container) {
|
|
298
|
+
container = document.createElement('div');
|
|
299
|
+
container.id = '_scitex_step_messages';
|
|
300
|
+
container.style.cssText = `
|
|
301
|
+
position: fixed; top: 10px; left: 10px; z-index: 2147483647;
|
|
302
|
+
display: flex; flex-direction: column; gap: 8px;
|
|
303
|
+
max-width: 600px; pointer-events: none;
|
|
304
|
+
`;
|
|
305
|
+
document.body.appendChild(container);
|
|
306
|
+
}
|
|
307
|
+
const popup = document.createElement('div');
|
|
308
|
+
popup.className = 'scitex-step-msg';
|
|
309
|
+
popup.innerHTML = `<strong>[${step}/${total}] ${message}</strong>`;
|
|
310
|
+
popup.style.cssText = `
|
|
311
|
+
background: rgba(0, 0, 0, 0.9); color: white;
|
|
312
|
+
padding: 14px 24px; border-radius: 8px; font-size: 16px;
|
|
313
|
+
font-family: 'Courier New', monospace;
|
|
314
|
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.5);
|
|
315
|
+
border-left: 6px solid ${color}; word-wrap: break-word;
|
|
316
|
+
`;
|
|
317
|
+
container.appendChild(popup);
|
|
318
|
+
while (container.children.length > 5) container.removeChild(container.firstChild);
|
|
319
|
+
setTimeout(() => { if (popup.parentNode) popup.parentNode.removeChild(popup); }, 8000);
|
|
320
|
+
}
|
|
321
|
+
""", [step, total, message, color])
|
|
322
|
+
page.wait_for_timeout(200)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
async def show_step_async(
|
|
326
|
+
page: "AsyncPage",
|
|
327
|
+
step: int,
|
|
328
|
+
total: int,
|
|
329
|
+
message: str,
|
|
330
|
+
level: str = "info"
|
|
331
|
+
) -> None:
|
|
332
|
+
"""Show numbered step message in browser (async version)."""
|
|
333
|
+
color_map = {
|
|
334
|
+
"info": "#17A2B8",
|
|
335
|
+
"success": "#28A745",
|
|
336
|
+
"warning": "#FFC107",
|
|
337
|
+
"error": "#DC3545",
|
|
338
|
+
}
|
|
339
|
+
color = color_map.get(level, color_map["info"])
|
|
340
|
+
|
|
341
|
+
await page.evaluate("""
|
|
342
|
+
([step, total, message, color]) => {
|
|
343
|
+
let container = document.getElementById('_scitex_step_messages');
|
|
344
|
+
if (!container) {
|
|
345
|
+
container = document.createElement('div');
|
|
346
|
+
container.id = '_scitex_step_messages';
|
|
347
|
+
container.style.cssText = `
|
|
348
|
+
position: fixed; top: 10px; left: 10px; z-index: 2147483647;
|
|
349
|
+
display: flex; flex-direction: column; gap: 8px;
|
|
350
|
+
max-width: 600px; pointer-events: none;
|
|
351
|
+
`;
|
|
352
|
+
document.body.appendChild(container);
|
|
353
|
+
}
|
|
354
|
+
const popup = document.createElement('div');
|
|
355
|
+
popup.className = 'scitex-step-msg';
|
|
356
|
+
popup.innerHTML = `<strong>[${step}/${total}] ${message}</strong>`;
|
|
357
|
+
popup.style.cssText = `
|
|
358
|
+
background: rgba(0, 0, 0, 0.9); color: white;
|
|
359
|
+
padding: 14px 24px; border-radius: 8px; font-size: 16px;
|
|
360
|
+
font-family: 'Courier New', monospace;
|
|
361
|
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.5);
|
|
362
|
+
border-left: 6px solid ${color}; word-wrap: break-word;
|
|
363
|
+
`;
|
|
364
|
+
container.appendChild(popup);
|
|
365
|
+
while (container.children.length > 5) container.removeChild(container.firstChild);
|
|
366
|
+
setTimeout(() => { if (popup.parentNode) popup.parentNode.removeChild(popup); }, 8000);
|
|
367
|
+
}
|
|
368
|
+
""", [step, total, message, color])
|
|
369
|
+
await page.wait_for_timeout(200)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def show_test_result(
|
|
373
|
+
page: Union["AsyncPage", "SyncPage"],
|
|
374
|
+
success: bool,
|
|
375
|
+
message: str = "",
|
|
376
|
+
delay_ms: int = 3000
|
|
377
|
+
) -> None:
|
|
378
|
+
"""Show test result banner (PASS/FAIL) and wait (sync version).
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
page: Playwright page object
|
|
382
|
+
success: True for PASS, False for FAIL
|
|
383
|
+
message: Optional message to display
|
|
384
|
+
delay_ms: How long to display before continuing
|
|
385
|
+
"""
|
|
386
|
+
status = "PASS" if success else "FAIL"
|
|
387
|
+
css_class = "success" if success else "failure"
|
|
388
|
+
display_text = f"{status}" + (f": {message}" if message else "")
|
|
389
|
+
|
|
390
|
+
page.evaluate("""
|
|
391
|
+
([displayText, cssClass]) => {
|
|
392
|
+
// Remove existing banner
|
|
393
|
+
const existing = document.getElementById('_scitex_result_banner');
|
|
394
|
+
if (existing) existing.remove();
|
|
395
|
+
|
|
396
|
+
const banner = document.createElement('div');
|
|
397
|
+
banner.id = '_scitex_result_banner';
|
|
398
|
+
banner.className = cssClass;
|
|
399
|
+
banner.textContent = displayText;
|
|
400
|
+
document.body.appendChild(banner);
|
|
401
|
+
}
|
|
402
|
+
""", [display_text, css_class])
|
|
403
|
+
page.wait_for_timeout(delay_ms)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
async def show_test_result_async(
|
|
407
|
+
page: "AsyncPage",
|
|
408
|
+
success: bool,
|
|
409
|
+
message: str = "",
|
|
410
|
+
delay_ms: int = 3000
|
|
411
|
+
) -> None:
|
|
412
|
+
"""Show test result banner (PASS/FAIL) and wait (async version)."""
|
|
413
|
+
status = "PASS" if success else "FAIL"
|
|
414
|
+
css_class = "success" if success else "failure"
|
|
415
|
+
display_text = f"{status}" + (f": {message}" if message else "")
|
|
416
|
+
|
|
417
|
+
await page.evaluate("""
|
|
418
|
+
([displayText, cssClass]) => {
|
|
419
|
+
const existing = document.getElementById('_scitex_result_banner');
|
|
420
|
+
if (existing) existing.remove();
|
|
421
|
+
|
|
422
|
+
const banner = document.createElement('div');
|
|
423
|
+
banner.id = '_scitex_result_banner';
|
|
424
|
+
banner.className = cssClass;
|
|
425
|
+
banner.textContent = displayText;
|
|
426
|
+
document.body.appendChild(banner);
|
|
427
|
+
}
|
|
428
|
+
""", [display_text, css_class])
|
|
429
|
+
await page.wait_for_timeout(delay_ms)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
# EOF
|
|
@@ -44,9 +44,16 @@ class CitationDatabase:
|
|
|
44
44
|
read_only: If True, open in read-only mode (default)
|
|
45
45
|
"""
|
|
46
46
|
if read_only:
|
|
47
|
-
self.conn = sqlite3.connect(
|
|
47
|
+
self.conn = sqlite3.connect(
|
|
48
|
+
f"file:{self.db_path}?mode=ro",
|
|
49
|
+
uri=True,
|
|
50
|
+
check_same_thread=False # Allow multi-threaded access (e.g., Django)
|
|
51
|
+
)
|
|
48
52
|
else:
|
|
49
|
-
self.conn = sqlite3.connect(
|
|
53
|
+
self.conn = sqlite3.connect(
|
|
54
|
+
self.db_path,
|
|
55
|
+
check_same_thread=False
|
|
56
|
+
)
|
|
50
57
|
|
|
51
58
|
self.conn.row_factory = sqlite3.Row
|
|
52
59
|
|
|
@@ -29,8 +29,22 @@ logger = getLogger(__name__)
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class ScholarConfig:
|
|
32
|
-
def __init__(
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
config_path: Optional[Union[str, Path]] = None,
|
|
35
|
+
scholar_dir: Optional[Union[str, Path]] = None,
|
|
36
|
+
):
|
|
37
|
+
"""Initialize ScholarConfig.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
config_path: Path to custom config YAML file
|
|
41
|
+
scholar_dir: Direct path to scholar directory (e.g., /data/users/alice/.scitex)
|
|
42
|
+
This bypasses SCITEX_DIR env var for thread-safe multi-user usage.
|
|
43
|
+
Use this in Django/multi-user environments to avoid race conditions.
|
|
44
|
+
"""
|
|
33
45
|
self.name = self.__class__.__name__
|
|
46
|
+
self._explicit_scholar_dir = scholar_dir # Store for thread-safe access
|
|
47
|
+
|
|
34
48
|
if config_path and Path(config_path).exists():
|
|
35
49
|
config_data = self.load_yaml(config_path)
|
|
36
50
|
else:
|
|
@@ -114,8 +128,14 @@ class ScholarConfig:
|
|
|
114
128
|
|
|
115
129
|
# Path Management ----------------------------------------
|
|
116
130
|
def _setup_path_manager(self, scholar_dir=None):
|
|
117
|
-
|
|
118
|
-
|
|
131
|
+
# Priority: explicit parameter > env var > config > default
|
|
132
|
+
if self._explicit_scholar_dir:
|
|
133
|
+
# Use explicitly provided path (thread-safe for multi-user)
|
|
134
|
+
base_path = Path(self._explicit_scholar_dir).expanduser() / "scholar"
|
|
135
|
+
else:
|
|
136
|
+
# Fall back to cascade resolution (uses SCITEX_DIR env var)
|
|
137
|
+
scholar_dir = self.cascade.resolve("scholar_dir", default="~/.scitex")
|
|
138
|
+
base_path = Path(scholar_dir).expanduser() / "scholar"
|
|
119
139
|
self.path_manager = PathManager(scholar_dir=base_path)
|
|
120
140
|
|
|
121
141
|
@property
|
|
@@ -38,6 +38,61 @@ timeout: 60
|
|
|
38
38
|
# Note: Download directory is fixed at {scholar_dir}/library/downloads/
|
|
39
39
|
# Use get_library_downloads_dir() to access it
|
|
40
40
|
|
|
41
|
+
# Open Access & Paywall Options
|
|
42
|
+
# ----------------------------------------
|
|
43
|
+
# Prefer Open Access versions when available
|
|
44
|
+
prefer_open_access: ${SCITEX_SCHOLAR_PREFER_OPEN_ACCESS:-true}
|
|
45
|
+
# Allow attempting paywalled journal downloads (requires authentication)
|
|
46
|
+
# Only enable for local/personal use with valid institutional access
|
|
47
|
+
enable_paywall_access: ${SCITEX_SCHOLAR_ENABLE_PAYWALL_ACCESS:-false}
|
|
48
|
+
# Track paywall bypass attempts in metadata (for transparency)
|
|
49
|
+
track_paywall_attempts: ${SCITEX_SCHOLAR_TRACK_PAYWALL_ATTEMPTS:-true}
|
|
50
|
+
|
|
51
|
+
# Open Access Sources (repositories that are always free to download)
|
|
52
|
+
OPENACCESS_SOURCES:
|
|
53
|
+
- arxiv
|
|
54
|
+
- pmc
|
|
55
|
+
- pubmed_central
|
|
56
|
+
- biorxiv
|
|
57
|
+
- medrxiv
|
|
58
|
+
- chemrxiv
|
|
59
|
+
- doaj
|
|
60
|
+
- plos
|
|
61
|
+
- peerj
|
|
62
|
+
- frontiers
|
|
63
|
+
- mdpi
|
|
64
|
+
- hindawi
|
|
65
|
+
- nature_communications # Most articles are OA
|
|
66
|
+
|
|
67
|
+
# Open Access Journals (partial list, DOAJ has full list)
|
|
68
|
+
# These are journal name patterns (case-insensitive substring match)
|
|
69
|
+
OPENACCESS_JOURNALS:
|
|
70
|
+
- "plos one"
|
|
71
|
+
- "plos biology"
|
|
72
|
+
- "plos medicine"
|
|
73
|
+
- "plos computational biology"
|
|
74
|
+
- "plos genetics"
|
|
75
|
+
- "plos pathogens"
|
|
76
|
+
- "plos neglected tropical diseases"
|
|
77
|
+
- "scientific reports"
|
|
78
|
+
- "nature communications"
|
|
79
|
+
- "elife"
|
|
80
|
+
- "peerj"
|
|
81
|
+
- "frontiers in" # All Frontiers journals
|
|
82
|
+
- "bmc " # All BMC journals
|
|
83
|
+
- "journal of open source software"
|
|
84
|
+
- "f1000research"
|
|
85
|
+
- "gigascience"
|
|
86
|
+
- "cell reports"
|
|
87
|
+
- "science advances"
|
|
88
|
+
- "iscience"
|
|
89
|
+
- "heliyon"
|
|
90
|
+
- "cureus"
|
|
91
|
+
- "jmir" # Journal of Medical Internet Research
|
|
92
|
+
|
|
93
|
+
# Unpaywall API email (required for polite access)
|
|
94
|
+
unpaywall_email: ${SCITEX_SCHOLAR_UNPAYWALL_EMAIL:-"research@scitex.io"}
|
|
95
|
+
|
|
41
96
|
# ----------------------------------------
|
|
42
97
|
# cache
|
|
43
98
|
# ----------------------------------------
|
scitex/scholar/core/Paper.py
CHANGED
|
@@ -284,6 +284,37 @@ class PathMetadata(BaseModel):
|
|
|
284
284
|
validate_assignment = True # Validate on attribute assignment too
|
|
285
285
|
|
|
286
286
|
|
|
287
|
+
class AccessMetadata(BaseModel):
|
|
288
|
+
"""Open access and licensing metadata with source tracking.
|
|
289
|
+
|
|
290
|
+
Tracks whether a paper is open access and provides URLs for OA versions.
|
|
291
|
+
Also includes license information when available.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
is_open_access: Optional[bool] = None
|
|
295
|
+
is_open_access_engines: List[str] = Field(default_factory=list)
|
|
296
|
+
|
|
297
|
+
oa_status: Optional[str] = None # gold, green, bronze, hybrid, closed
|
|
298
|
+
oa_status_engines: List[str] = Field(default_factory=list)
|
|
299
|
+
|
|
300
|
+
oa_url: Optional[str] = None # URL to open access version
|
|
301
|
+
oa_url_engines: List[str] = Field(default_factory=list)
|
|
302
|
+
|
|
303
|
+
license: Optional[str] = None # CC-BY, CC-BY-NC, etc.
|
|
304
|
+
license_engines: List[str] = Field(default_factory=list)
|
|
305
|
+
|
|
306
|
+
license_url: Optional[str] = None
|
|
307
|
+
license_url_engines: List[str] = Field(default_factory=list)
|
|
308
|
+
|
|
309
|
+
# For paywalled journals - opt-in for local/personal users
|
|
310
|
+
paywall_bypass_attempted: Optional[bool] = None
|
|
311
|
+
paywall_bypass_success: Optional[bool] = None
|
|
312
|
+
|
|
313
|
+
class Config:
|
|
314
|
+
populate_by_name = True
|
|
315
|
+
validate_assignment = True
|
|
316
|
+
|
|
317
|
+
|
|
287
318
|
class SystemMetadata(BaseModel):
|
|
288
319
|
"""System tracking metadata (which engines were used to search)."""
|
|
289
320
|
|
|
@@ -313,6 +344,7 @@ class PaperMetadataStructure(BaseModel):
|
|
|
313
344
|
)
|
|
314
345
|
url: URLMetadata = Field(default_factory=URLMetadata)
|
|
315
346
|
path: PathMetadata = Field(default_factory=PathMetadata)
|
|
347
|
+
access: AccessMetadata = Field(default_factory=AccessMetadata)
|
|
316
348
|
system: SystemMetadata = Field(default_factory=SystemMetadata)
|
|
317
349
|
|
|
318
350
|
class Config:
|
|
@@ -418,6 +450,7 @@ class PaperMetadataStructure(BaseModel):
|
|
|
418
450
|
),
|
|
419
451
|
"url": self.url.model_dump(by_alias=True, **kwargs),
|
|
420
452
|
"path": self.path.model_dump(by_alias=True, **kwargs),
|
|
453
|
+
"access": self.access.model_dump(by_alias=True, **kwargs),
|
|
421
454
|
"system": self.system.model_dump(by_alias=True, **kwargs),
|
|
422
455
|
}
|
|
423
456
|
|
|
@@ -489,6 +522,75 @@ class Paper(BaseModel):
|
|
|
489
522
|
"""
|
|
490
523
|
return self.model_dump()
|
|
491
524
|
|
|
525
|
+
def detect_open_access(
|
|
526
|
+
self,
|
|
527
|
+
use_unpaywall: bool = False,
|
|
528
|
+
update_metadata: bool = True,
|
|
529
|
+
) -> "OAResult":
|
|
530
|
+
"""
|
|
531
|
+
Detect open access status for this paper.
|
|
532
|
+
|
|
533
|
+
Uses identifiers (DOI, arXiv ID, PMCID) and known OA sources
|
|
534
|
+
to determine if the paper is freely available.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
use_unpaywall: If True, query Unpaywall API for uncertain cases
|
|
538
|
+
update_metadata: If True, update self.metadata.access with results
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
OAResult with detection results
|
|
542
|
+
"""
|
|
543
|
+
from .open_access import check_oa_status, OAResult
|
|
544
|
+
|
|
545
|
+
result = check_oa_status(
|
|
546
|
+
doi=self.metadata.id.doi,
|
|
547
|
+
arxiv_id=self.metadata.id.arxiv_id,
|
|
548
|
+
pmcid=None, # Not currently in IDMetadata
|
|
549
|
+
source=None, # Source tracking not in Paper
|
|
550
|
+
journal=self.metadata.publication.journal,
|
|
551
|
+
is_open_access_flag=self.metadata.access.is_open_access,
|
|
552
|
+
use_unpaywall=use_unpaywall,
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
if update_metadata:
|
|
556
|
+
self.metadata.access.is_open_access = result.is_open_access
|
|
557
|
+
self.metadata.access.is_open_access_engines.append(
|
|
558
|
+
f"detect_oa:{result.source}"
|
|
559
|
+
)
|
|
560
|
+
if result.status:
|
|
561
|
+
self.metadata.access.oa_status = result.status.value
|
|
562
|
+
self.metadata.access.oa_status_engines.append(
|
|
563
|
+
f"detect_oa:{result.source}"
|
|
564
|
+
)
|
|
565
|
+
if result.oa_url:
|
|
566
|
+
self.metadata.access.oa_url = result.oa_url
|
|
567
|
+
self.metadata.access.oa_url_engines.append(
|
|
568
|
+
f"detect_oa:{result.source}"
|
|
569
|
+
)
|
|
570
|
+
if result.license:
|
|
571
|
+
self.metadata.access.license = result.license
|
|
572
|
+
self.metadata.access.license_engines.append(
|
|
573
|
+
f"detect_oa:{result.source}"
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
return result
|
|
577
|
+
|
|
578
|
+
@property
|
|
579
|
+
def is_open_access(self) -> bool:
|
|
580
|
+
"""Check if paper is open access (quick check without API calls)."""
|
|
581
|
+
if self.metadata.access.is_open_access is not None:
|
|
582
|
+
return self.metadata.access.is_open_access
|
|
583
|
+
|
|
584
|
+
# Quick detection from identifiers
|
|
585
|
+
from .open_access import detect_oa_from_identifiers
|
|
586
|
+
|
|
587
|
+
result = detect_oa_from_identifiers(
|
|
588
|
+
doi=self.metadata.id.doi,
|
|
589
|
+
arxiv_id=self.metadata.id.arxiv_id,
|
|
590
|
+
journal=self.metadata.publication.journal,
|
|
591
|
+
)
|
|
592
|
+
return result.is_open_access
|
|
593
|
+
|
|
492
594
|
|
|
493
595
|
if __name__ == "__main__":
|
|
494
596
|
import json
|