pyannotators-entityfishing 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyannotators_entityfishing-0.6.3/.gitignore +123 -0
- pyannotators_entityfishing-0.6.3/Dockerfile +18 -0
- pyannotators_entityfishing-0.6.3/Jenkinsfile +386 -0
- pyannotators_entityfishing-0.6.3/MIGRATION.md +82 -0
- pyannotators_entityfishing-0.6.3/PKG-INFO +110 -0
- pyannotators_entityfishing-0.6.3/README.md +64 -0
- pyannotators_entityfishing-0.6.3/pyproject.toml +89 -0
- pyannotators_entityfishing-0.6.3/src/pyannotators_entityfishing/__init__.py +3 -0
- pyannotators_entityfishing-0.6.3/src/pyannotators_entityfishing/ef_client.py +242 -0
- pyannotators_entityfishing-0.6.3/src/pyannotators_entityfishing/entityfishing.py +347 -0
- pyannotators_entityfishing-0.6.3/tests/test_entityfishing.py +485 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
*.manifest
|
|
32
|
+
*.spec
|
|
33
|
+
|
|
34
|
+
# Installer logs
|
|
35
|
+
pip-log.txt
|
|
36
|
+
pip-delete-this-directory.txt
|
|
37
|
+
|
|
38
|
+
# Unit test / coverage reports
|
|
39
|
+
htmlcov/
|
|
40
|
+
.tox/
|
|
41
|
+
.nox/
|
|
42
|
+
.coverage
|
|
43
|
+
.coverage.*
|
|
44
|
+
.cache
|
|
45
|
+
nosetests.xml
|
|
46
|
+
coverage.xml
|
|
47
|
+
results.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
|
|
53
|
+
# Translations
|
|
54
|
+
*.mo
|
|
55
|
+
*.pot
|
|
56
|
+
|
|
57
|
+
# Django stuff:
|
|
58
|
+
*.log
|
|
59
|
+
local_settings.py
|
|
60
|
+
db.sqlite3
|
|
61
|
+
db.sqlite3-journal
|
|
62
|
+
|
|
63
|
+
# Flask stuff:
|
|
64
|
+
instance/
|
|
65
|
+
.webassets-cache
|
|
66
|
+
|
|
67
|
+
# Scrapy stuff:
|
|
68
|
+
.scrapy
|
|
69
|
+
|
|
70
|
+
# Sphinx documentation
|
|
71
|
+
docs/_build/
|
|
72
|
+
|
|
73
|
+
# PyBuilder
|
|
74
|
+
target/
|
|
75
|
+
|
|
76
|
+
# Jupyter Notebook
|
|
77
|
+
.ipynb_checkpoints
|
|
78
|
+
|
|
79
|
+
# IPython
|
|
80
|
+
profile_default/
|
|
81
|
+
ipython_config.py
|
|
82
|
+
|
|
83
|
+
# pyenv
|
|
84
|
+
.python-version
|
|
85
|
+
|
|
86
|
+
# celery beat schedule file
|
|
87
|
+
celerybeat-schedule
|
|
88
|
+
|
|
89
|
+
# SageMath parsed files
|
|
90
|
+
*.sage.py
|
|
91
|
+
|
|
92
|
+
# Environments
|
|
93
|
+
.env
|
|
94
|
+
.venv
|
|
95
|
+
env/
|
|
96
|
+
venv/
|
|
97
|
+
ENV/
|
|
98
|
+
env.bak/
|
|
99
|
+
venv.bak/
|
|
100
|
+
|
|
101
|
+
# Spyder project settings
|
|
102
|
+
.spyderproject
|
|
103
|
+
.spyproject
|
|
104
|
+
|
|
105
|
+
# Rope project settings
|
|
106
|
+
.ropeproject
|
|
107
|
+
|
|
108
|
+
# mkdocs documentation
|
|
109
|
+
/site
|
|
110
|
+
|
|
111
|
+
# mypy
|
|
112
|
+
.mypy_cache/
|
|
113
|
+
.dmypy.json
|
|
114
|
+
dmypy.json
|
|
115
|
+
|
|
116
|
+
# Pyre type checker
|
|
117
|
+
.pyre/
|
|
118
|
+
|
|
119
|
+
# Specific
|
|
120
|
+
.idea/
|
|
121
|
+
.groovylintrc.json
|
|
122
|
+
.emailNotif
|
|
123
|
+
uv.lock
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
FROM python:3.12-slim-bookworm
|
|
2
|
+
# Install prerequisites
|
|
3
|
+
RUN apt-get update -y && \
|
|
4
|
+
apt-get install -y \
|
|
5
|
+
gcc && \
|
|
6
|
+
apt-get install -y --no-install-recommends \
|
|
7
|
+
g++ \
|
|
8
|
+
git && \
|
|
9
|
+
# Final upgrade + clean
|
|
10
|
+
apt-get update -y && \
|
|
11
|
+
apt-get clean all -y
|
|
12
|
+
|
|
13
|
+
# Install uv
|
|
14
|
+
RUN pip install uv
|
|
15
|
+
|
|
16
|
+
# Add pyproject.toml + README.md for uv install
|
|
17
|
+
ADD pyproject.toml pyproject.toml
|
|
18
|
+
ADD README.md README.md
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
pipeline {
|
|
2
|
+
environment {
|
|
3
|
+
PATH_HOME = '/home/jenkins'
|
|
4
|
+
TEST_REPORT_DIR = '/root/test-reports'
|
|
5
|
+
PYTHONPYCACHEPREFIX = '/tmp/.pytest_cache'
|
|
6
|
+
PYTHONDONTWRITEBYTECODE = '1'
|
|
7
|
+
JENKINS_UIDGID = '1004:1004'
|
|
8
|
+
|
|
9
|
+
MAJOR_VERSION = '0'
|
|
10
|
+
MINOR_VERSION = '6'
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
agent none
|
|
14
|
+
|
|
15
|
+
triggers {
|
|
16
|
+
upstream(upstreamProjects: 'pymultirole_plugins/' + BRANCH_NAME.replaceAll('/', '%2F'),\
|
|
17
|
+
threshold: hudson.model.Result.SUCCESS)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
stages {
|
|
21
|
+
stage('Catch build termination') {
|
|
22
|
+
agent {
|
|
23
|
+
node {
|
|
24
|
+
label 'built-in'
|
|
25
|
+
customWorkspace "${PATH_HOME}/${JOB_NAME}"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
stages {
|
|
29
|
+
stage('Analyse build cause') {
|
|
30
|
+
steps {
|
|
31
|
+
script {
|
|
32
|
+
analyseBuildCause()
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
stage('Generate new version') {
|
|
40
|
+
when {
|
|
41
|
+
environment name: 'SKIP_JOB', value: '0'
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
agent {
|
|
45
|
+
node {
|
|
46
|
+
label 'built-in'
|
|
47
|
+
customWorkspace "${PATH_HOME}/${JOB_NAME}"
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
stages {
|
|
52
|
+
stage('Add credentials') {
|
|
53
|
+
steps {
|
|
54
|
+
script {
|
|
55
|
+
// Add password file for uv publishing
|
|
56
|
+
sh "cp ${PATH_HOME}/.passwd-pypi .env"
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
stage('Commit new version') {
|
|
62
|
+
steps {
|
|
63
|
+
script {
|
|
64
|
+
println("attempt to publish ${JOB_NAME} with version: ${MAJOR_VERSION}.${MINOR_VERSION}.${BUILD_ID}")
|
|
65
|
+
|
|
66
|
+
// push updates of file __init__.py
|
|
67
|
+
withCredentials([gitUsernamePassword(credentialsId: 'bitbucket-user', gitToolName: 'git-tool')]) {
|
|
68
|
+
sh 'git pull'
|
|
69
|
+
sh "echo '\"\"\"Annotator based on entity-fishing\"\"\"' > src/pyannotators_entityfishing/__init__.py"
|
|
70
|
+
sh "echo '' >> src/pyannotators_entityfishing/__init__.py"
|
|
71
|
+
sh "echo '__version__ = \"${MAJOR_VERSION}.${MINOR_VERSION}.${BUILD_ID}\"' >> src/pyannotators_entityfishing/__init__.py"
|
|
72
|
+
sh 'git commit src/pyannotators_entityfishing/__init__.py -m "[Jenkins CI] Commit on version files" || echo "No changes to commit"'
|
|
73
|
+
sh 'git push'
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
stage('Build, test and publish') {
|
|
82
|
+
when {
|
|
83
|
+
beforeAgent true
|
|
84
|
+
environment name: 'SKIP_JOB', value: '0'
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
agent {
|
|
88
|
+
// dockerfile agent
|
|
89
|
+
// Mounted volume for Junit reports
|
|
90
|
+
// - docker: /root/test-reports
|
|
91
|
+
// - host : /tmp/_${JOB_NAME}/test-reports
|
|
92
|
+
dockerfile {
|
|
93
|
+
label 'built-in'
|
|
94
|
+
customWorkspace "${PATH_HOME}/${JOB_NAME}"
|
|
95
|
+
filename 'Dockerfile'
|
|
96
|
+
args "-u root --privileged -v /tmp/_${JOB_NAME}/test-reports:${TEST_REPORT_DIR}"
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
stages {
|
|
101
|
+
stage('Install dependencies') {
|
|
102
|
+
steps {
|
|
103
|
+
sh 'pip install uv'
|
|
104
|
+
sh 'uv sync --extra test'
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
stage('Test & lint python code') {
|
|
109
|
+
steps {
|
|
110
|
+
// remove any previous results.xml file
|
|
111
|
+
sh "rm -f ${TEST_REPORT_DIR}/results.xml"
|
|
112
|
+
sh 'uv run ruff check .'
|
|
113
|
+
sh 'uv run ruff format --check .'
|
|
114
|
+
sh "uv run pytest --junit-xml=${TEST_REPORT_DIR}/results.xml"
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
stage('Publish on PyPI') {
|
|
119
|
+
environment {
|
|
120
|
+
UV_PUBLISH_USERNAME = getUserName '.env'
|
|
121
|
+
UV_PUBLISH_PASSWORD = getUserPass '.env'
|
|
122
|
+
}
|
|
123
|
+
steps {
|
|
124
|
+
// remove any previous folder dist
|
|
125
|
+
sh 'rm -rf dist'
|
|
126
|
+
// pull recent updates of file __init__.py
|
|
127
|
+
withCredentials([gitUsernamePassword(credentialsId: 'bitbucket-user', gitToolName: 'git-tool')]) {
|
|
128
|
+
sh 'git config --global pull.rebase false'
|
|
129
|
+
sh "git config --global --add safe.directory ${WORKSPACE}"
|
|
130
|
+
sh 'git pull'
|
|
131
|
+
}
|
|
132
|
+
// put back owner of .git folder
|
|
133
|
+
sh "chown -R ${JENKINS_UIDGID} ${WORKSPACE}/.git"
|
|
134
|
+
// put back owner of pulled file
|
|
135
|
+
sh "chown ${JENKINS_UIDGID} src/pyannotators_entityfishing/__init__.py"
|
|
136
|
+
// get git status
|
|
137
|
+
sh 'git status'
|
|
138
|
+
// publish on PyPI
|
|
139
|
+
sh '''
|
|
140
|
+
export COMMIT_VERSION=$( cat src/pyannotators_entityfishing/__init__.py|grep version|cut -d '"' -f2|tr -s '[:blank:]' )
|
|
141
|
+
export BUILD_VERSION="${MAJOR_VERSION}"."${MINOR_VERSION}"."${BUILD_ID}"
|
|
142
|
+
if [ "${COMMIT_VERSION}" = "${BUILD_VERSION}" ] ; then uv build && uv publish ; fi
|
|
143
|
+
'''
|
|
144
|
+
// remove current folder dist
|
|
145
|
+
sh 'rm -rf dist'
|
|
146
|
+
// remove current folder .hypothesis
|
|
147
|
+
sh 'rm -rf .hypothesis'
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
post {
|
|
155
|
+
// only triggered when blue or green sign
|
|
156
|
+
success {
|
|
157
|
+
// node is specified here to get an agent
|
|
158
|
+
node('built-in') {
|
|
159
|
+
// keep using customWorkspace to store Junit report
|
|
160
|
+
ws("${PATH_HOME}/${JOB_NAME}") {
|
|
161
|
+
script {
|
|
162
|
+
try {
|
|
163
|
+
sh 'rm -f results.xml'
|
|
164
|
+
sh "cp /tmp/_${JOB_NAME}/test-reports/results.xml results.xml"
|
|
165
|
+
} catch (Exception e) {
|
|
166
|
+
println 'Exception occurred: ' + e.toString()
|
|
167
|
+
}
|
|
168
|
+
try {
|
|
169
|
+
junit 'results.xml'
|
|
170
|
+
} catch (Exception e) {
|
|
171
|
+
println 'Exception occurred: ' + e.toString()
|
|
172
|
+
}
|
|
173
|
+
if (sendEmailNotif("${PATH_HOME}/${JOB_NAME}", "${BUILD_NUMBER}")) {
|
|
174
|
+
println 'sending Success Build notification'
|
|
175
|
+
def CUSTOM_SUBJECT = '[CI - Jenkinzz SUCCESS] ' + CUSTOM_SUBJECT
|
|
176
|
+
emailext(
|
|
177
|
+
mimeType: 'text/html',
|
|
178
|
+
subject: CUSTOM_SUBJECT,
|
|
179
|
+
body: '${DEFAULT_CONTENT}',
|
|
180
|
+
replyTo: '${DEFAULT_REPLYTO}',
|
|
181
|
+
to: '${ADMIN_RECIPIENTS}' + ';' + CUSTOM_RECIPIENTS
|
|
182
|
+
)
|
|
183
|
+
switchEmailNotif(false, BUILD_NUMBER)
|
|
184
|
+
} else {
|
|
185
|
+
println 'preventing Success Build notification'
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
// triggered when red sign
|
|
192
|
+
failure {
|
|
193
|
+
// node is specified here to get an agent
|
|
194
|
+
node('built-in') {
|
|
195
|
+
// keep using customWorkspace to store Junit report
|
|
196
|
+
ws("${PATH_HOME}/${JOB_NAME}") {
|
|
197
|
+
script {
|
|
198
|
+
try {
|
|
199
|
+
sh 'rm -f results.xml'
|
|
200
|
+
sh "cp /tmp/_${JOB_NAME}/test-reports/results.xml results.xml"
|
|
201
|
+
} catch (Exception e) {
|
|
202
|
+
println 'Exception occurred: ' + e.toString()
|
|
203
|
+
}
|
|
204
|
+
try {
|
|
205
|
+
junit 'results.xml'
|
|
206
|
+
} catch (Exception e) {
|
|
207
|
+
println 'Exception occurred: ' + e.toString()
|
|
208
|
+
}
|
|
209
|
+
println 'sending Failure Build notification'
|
|
210
|
+
def CUSTOM_SUBJECT = '[CI - Jenkinzz FAILURE] ' + CUSTOM_SUBJECT
|
|
211
|
+
emailext(
|
|
212
|
+
mimeType: 'text/html',
|
|
213
|
+
subject: CUSTOM_SUBJECT,
|
|
214
|
+
body: '${DEFAULT_CONTENT}',
|
|
215
|
+
replyTo: '${DEFAULT_REPLYTO}',
|
|
216
|
+
to: '${ADMIN_RECIPIENTS}' + ';' + CUSTOM_RECIPIENTS
|
|
217
|
+
)
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// triggered when black sign
|
|
223
|
+
aborted {
|
|
224
|
+
println 'post-declarative message: abort job'
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// return UV_PUBLISH_USERNAME from given file
|
|
230
|
+
def getUserName(path) {
|
|
231
|
+
def USERNAME = sh(
|
|
232
|
+
script: "grep FLIT_USERNAME ${path}|cut -d '=' -f2",
|
|
233
|
+
returnStdout: true
|
|
234
|
+
).trim()
|
|
235
|
+
return USERNAME
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// return UV_PUBLISH_PASSWORD from given file
|
|
239
|
+
def getUserPass(path) {
|
|
240
|
+
def USERPASS = sh(
|
|
241
|
+
script: "grep FLIT_PASSWORD ${path}|cut -d '=' -f2",
|
|
242
|
+
returnStdout: true
|
|
243
|
+
).trim()
|
|
244
|
+
return USERPASS
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// create/remove emailNotif file to trigger email notification
|
|
248
|
+
def switchEmailNotif(toggle, build) {
|
|
249
|
+
if (toggle) {
|
|
250
|
+
sh 'echo ' + build + ' > .emailNotif'
|
|
251
|
+
} else {
|
|
252
|
+
if (build == BUILD_NUMBER) {
|
|
253
|
+
sh 'rm -f .emailNotif'
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// return true if emailNotif file present
|
|
259
|
+
boolean sendEmailNotif(path, build) {
|
|
260
|
+
def emailNotif = sh(
|
|
261
|
+
script: "find ${path} -name '.emailNotif'|wc -l",
|
|
262
|
+
returnStdout: true
|
|
263
|
+
).trim()
|
|
264
|
+
def emailContent = ''
|
|
265
|
+
if (emailNotif == '1') {
|
|
266
|
+
emailContent = sh(
|
|
267
|
+
script: "cat ${path}/.emailNotif",
|
|
268
|
+
returnStdout: true
|
|
269
|
+
).trim()
|
|
270
|
+
}
|
|
271
|
+
return (emailContent == build)
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
def analyseBuildCause() {
|
|
275
|
+
String[] upstreamProjects = ['pyimporters_plugins']
|
|
276
|
+
boolean upstreamRunning = false
|
|
277
|
+
String jobName
|
|
278
|
+
// iterate over upstreamProjects
|
|
279
|
+
for (upstream_project in upstreamProjects) {
|
|
280
|
+
Jenkins.instance.getItemByFullName(upstream_project).items.each { repository ->
|
|
281
|
+
boolean isRunning = false
|
|
282
|
+
if ( repository.name == BRANCH_NAME ) {
|
|
283
|
+
repository.allJobs.each { job ->
|
|
284
|
+
job.builds.each { build ->
|
|
285
|
+
if ( build.result == (null) ) {
|
|
286
|
+
jobName = build.parent.parent.name
|
|
287
|
+
isRunning = true
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
if ( isRunning ) {
|
|
291
|
+
upstreamRunning = true
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
boolean lastCommitIsTeam = sh(
|
|
299
|
+
script: 'git log -1 | grep "\\[Jenkins CI\\]"',
|
|
300
|
+
returnStatus: true
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
if (upstreamRunning) {
|
|
304
|
+
println 'Skipping build because upstream job detected (' + jobName + ')'
|
|
305
|
+
env.SKIP_JOB = '1'
|
|
306
|
+
switchEmailNotif(false, 0)
|
|
307
|
+
currentBuild.result = 'NOT_BUILT'
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
boolean isStartedByUser = currentBuild.rawBuild.getCause(hudson.model.Cause$UserIdCause) != null
|
|
311
|
+
if (isStartedByUser && !upstreamRunning) {
|
|
312
|
+
env.SKIP_JOB = '0'
|
|
313
|
+
env.CUSTOM_SUBJECT = JOB_NAME + ' - Manual Build #' + BUILD_NUMBER
|
|
314
|
+
env.CUSTOM_RECIPIENTS = emailextrecipients([[$class: 'RequesterRecipientProvider']])
|
|
315
|
+
switchEmailNotif(true, BUILD_NUMBER)
|
|
316
|
+
println 'Job started by User, proceeding'
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
boolean isStartedByUpstream = currentBuild.rawBuild.getCause(hudson.model.Cause$UpstreamCause) != null
|
|
320
|
+
if (isStartedByUpstream && !upstreamRunning) {
|
|
321
|
+
int changeSetCount = 0
|
|
322
|
+
int ciSkipCount = 0
|
|
323
|
+
String upstreamFullJobName = ''
|
|
324
|
+
for (Run upstreamBuild : currentBuild.upstreamBuilds) {
|
|
325
|
+
upstreamFullJobName = upstreamBuild.rawBuild.fullDisplayName
|
|
326
|
+
if (upstreamBuild.changeSets != null) {
|
|
327
|
+
def changeLogSets = upstreamBuild.changeSets
|
|
328
|
+
for (int i = 0; i < changeLogSets.size(); i++) {
|
|
329
|
+
changeSetCount++
|
|
330
|
+
def entries = changeLogSets[i].items
|
|
331
|
+
for (int j = 0; j < entries.length; j++) {
|
|
332
|
+
def entry = entries[j]
|
|
333
|
+
if (entry.msg.contains('[Jenkins CI]')) {
|
|
334
|
+
ciSkipCount++
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (changeSetCount > 0 && changeSetCount == ciSkipCount) {
|
|
341
|
+
env.SKIP_JOB = '1'
|
|
342
|
+
switchEmailNotif(false, 0)
|
|
343
|
+
println 'Job started by Upstream [' + upstreamFullJobName + '], with CI commit, skipping'
|
|
344
|
+
currentBuild.result = 'NOT_BUILT'
|
|
345
|
+
} else {
|
|
346
|
+
env.SKIP_JOB = '0'
|
|
347
|
+
env.CUSTOM_SUBJECT = JOB_NAME + ' - Upstream Build #' + BUILD_NUMBER
|
|
348
|
+
env.CUSTOM_RECIPIENTS = emailextrecipients([[$class:'UpstreamComitterRecipientProvider']])
|
|
349
|
+
switchEmailNotif(true, BUILD_NUMBER)
|
|
350
|
+
println 'Job started by Upstream [' + upstreamFullJobName + '], proceeding'
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
boolean isStartedByCommit = currentBuild.rawBuild.getCause(jenkins.branch.BranchEventCause) != null
|
|
355
|
+
if (isStartedByCommit && lastCommitIsTeam && !upstreamRunning) {
|
|
356
|
+
env.SKIP_JOB = '0'
|
|
357
|
+
env.CUSTOM_SUBJECT = JOB_NAME + ' - SCM Build #' + BUILD_NUMBER
|
|
358
|
+
env.CUSTOM_RECIPIENTS = emailextrecipients([[$class: 'DevelopersRecipientProvider'], [$class:'CulpritsRecipientProvider']])
|
|
359
|
+
switchEmailNotif(true, BUILD_NUMBER)
|
|
360
|
+
println 'Job started by User Commit, proceeding'
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
boolean isStartedByCron = currentBuild.rawBuild.getCause(hudson.triggers.TimerTrigger$TimerTriggerCause) != null
|
|
364
|
+
if (isStartedByCron && lastCommitIsTeam && !upstreamRunning) {
|
|
365
|
+
env.SKIP_JOB = '0'
|
|
366
|
+
env.CUSTOM_SUBJECT = JOB_NAME + ' - CRON Build #' + BUILD_NUMBER
|
|
367
|
+
env.CUSTOM_RECIPIENTS = emailextrecipients([[$class: 'DevelopersRecipientProvider'], [$class:'CulpritsRecipientProvider']])
|
|
368
|
+
switchEmailNotif(true, BUILD_NUMBER)
|
|
369
|
+
println 'Job started by Cron, proceeding'
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
boolean isStartedByBranchDiscovery = currentBuild.rawBuild.getCause(jenkins.branch.BranchIndexingCause) != null
|
|
373
|
+
if (isStartedByBranchDiscovery && lastCommitIsTeam && !upstreamRunning) {
|
|
374
|
+
env.SKIP_JOB = '0'
|
|
375
|
+
env.CUSTOM_SUBJECT = JOB_NAME + ' - BranchDiscovery Build #' + BUILD_NUMBER
|
|
376
|
+
env.CUSTOM_RECIPIENTS = emailextrecipients([[$class: 'DevelopersRecipientProvider'], [$class:'CulpritsRecipientProvider']])
|
|
377
|
+
switchEmailNotif(true, BUILD_NUMBER)
|
|
378
|
+
println 'Job started by Branch Discovery, proceeding'
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if (!lastCommitIsTeam && !upstreamRunning && !isStartedByUser && !isStartedByUpstream) {
|
|
382
|
+
println 'Skipping build because last commit has been done by CI'
|
|
383
|
+
env.SKIP_JOB = '1'
|
|
384
|
+
switchEmailNotif(false, 0)
|
|
385
|
+
}
|
|
386
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Migration from rest-pysuggester EntityFishing to pyannotators_entityfishing
|
|
2
|
+
|
|
3
|
+
This document describes the migration from the `rest-pysuggester` `EntityFishingSuggester`
|
|
4
|
+
to the standalone `pyannotators_entityfishing` annotator plugin.
|
|
5
|
+
|
|
6
|
+
## Architecture change
|
|
7
|
+
|
|
8
|
+
The original implementation lived inside `rest-pysuggester` as a `Suggester` subclass
|
|
9
|
+
with tight coupling to MongoDB, the Sherpa training pipeline, and a shared resource
|
|
10
|
+
manager. The new implementation is a standalone `AnnotatorBase` plugin following the
|
|
11
|
+
`pymultirole_plugins` v1 contract.
|
|
12
|
+
|
|
13
|
+
### What was kept (upward-compatible)
|
|
14
|
+
|
|
15
|
+
| Feature | Original | New |
|
|
16
|
+
|---------|----------|-----|
|
|
17
|
+
| EntityFishing HTTP client | `ef_client.py` (env-var config) | `ef_client.py` (constructor params from `EntityFishingParameters`) |
|
|
18
|
+
| Disambiguation parameters | `minSelectorScore`, `maxTermFrequency` | Same field names and defaults |
|
|
19
|
+
| Label mapping | `mapped_labels` (dict of JSON mongoquery) | Same format, same `mongoquery` library |
|
|
20
|
+
| Default label | `default_label` | Same |
|
|
21
|
+
| Output label filtering | `output_labels` | Same |
|
|
22
|
+
| Fingerprinting | `fingerprint`, `wikidata_properties`, `multivalued_props` | Same |
|
|
23
|
+
| Chunking | `do_chunking` | Same |
|
|
24
|
+
| Short text mode | `short_text` | Same |
|
|
25
|
+
| Noun filtering | `noun_forms_only` (default True) | Same (default changed to False, spacy optional) |
|
|
26
|
+
| Overlap filtering | `filter_annotations` (longest match) | Same algorithm |
|
|
27
|
+
| Annotation output | `Annotation` with `Term(identifier=QID, lexicon="wikidata", ...)` | Same schema |
|
|
28
|
+
| Parallel requests | `requests-futures` / `FuturesSession` | Same |
|
|
29
|
+
| Caching | `requests-cache` / `CachedSession` | Same (disabled by default) |
|
|
30
|
+
|
|
31
|
+
### What changed
|
|
32
|
+
|
|
33
|
+
1. **No training pipeline**: The annotator is stateless. There is no `train_model_unsafe`
|
|
34
|
+
or model persistence. The label classes are derived from `mapped_labels` +
|
|
35
|
+
`default_label` + `output_labels` at annotation time.
|
|
36
|
+
|
|
37
|
+
2. **No MongoDB integration**: Documents are passed in and returned directly.
|
|
38
|
+
|
|
39
|
+
3. **No spacy by default**: `noun_forms_only` defaults to `False` (was `True`).
|
|
40
|
+
When enabled, spacy is imported lazily and a language-specific model is loaded.
|
|
41
|
+
Install spacy separately: `pip install pyannotators-entityfishing[spacy]`.
|
|
42
|
+
|
|
43
|
+
4. **Configuration via parameters**: The EntityFishing service URL, pool size, and
|
|
44
|
+
caching settings are now fields on `EntityFishingParameters` instead of environment
|
|
45
|
+
variables. This makes the annotator self-contained and testable.
|
|
46
|
+
|
|
47
|
+
5. **Classification mode not ported**: The `EntityFishingClassifierSuggester` and
|
|
48
|
+
`EntityFishingDispatcherSuggester` were not ported. The classifier mode was
|
|
49
|
+
incomplete in the original and can be added later if needed.
|
|
50
|
+
|
|
51
|
+
6. **Python 3.12+ / Pydantic v2**: Following the migration guide from
|
|
52
|
+
`pyannotators_duckling`, the project uses modern Python type syntax and Pydantic v2.
|
|
53
|
+
|
|
54
|
+
## Parameter mapping reference
|
|
55
|
+
|
|
56
|
+
| Original `EntityFishingSequenceTrainParameters` | New `EntityFishingParameters` |
|
|
57
|
+
|--------------------------------------------------|-------------------------------|
|
|
58
|
+
| `minSelectorScore: float = 0.3` | `minSelectorScore: float = 0.3` |
|
|
59
|
+
| `maxTermFrequency: Optional[float]` | `maxTermFrequency: float \| None` |
|
|
60
|
+
| `mapped_labels: Dict[str, str]` | `mapped_labels: dict[str, str] \| None` |
|
|
61
|
+
| `default_label: Optional[str]` | `default_label: str \| None` |
|
|
62
|
+
| `output_labels: List[str]` | `output_labels: list[str] \| None` |
|
|
63
|
+
| `noun_forms_only: Optional[bool] = True` | `noun_forms_only: bool = False` |
|
|
64
|
+
| `fingerprint: Optional[str]` | `fingerprint: str \| None` |
|
|
65
|
+
| `wikidata_properties: Optional[str]` | `wikidata_properties: str \| None` |
|
|
66
|
+
| `do_chunking: Optional[bool] = False` | `do_chunking: bool = False` |
|
|
67
|
+
| `short_text: Optional[bool] = False` | `short_text: bool = False` |
|
|
68
|
+
| `multivalued_props: Optional[bool] = False` | `multivalued_props: bool = False` |
|
|
69
|
+
| *(env)* `APP_EF_URI` | `ef_uri: str` |
|
|
70
|
+
| *(env)* `APP_EF_CLIENT_POOLSIZE` | `ef_pool_size: int` |
|
|
71
|
+
| *(env)* `APP_EF_CLIENT_FULL` | `ef_full: bool` |
|
|
72
|
+
|
|
73
|
+
## Build and tooling
|
|
74
|
+
|
|
75
|
+
Following `pyannotators_duckling` MIGRATION.md:
|
|
76
|
+
|
|
77
|
+
- **Build system**: hatchling + uv
|
|
78
|
+
- **Python**: >=3.12
|
|
79
|
+
- **Pydantic**: v2
|
|
80
|
+
- **Linter**: ruff
|
|
81
|
+
- **Tests**: `uv run pytest`
|
|
82
|
+
- **CI**: Jenkinsfile with `uv sync --extra test`
|