@socialgouv/cdtn-elasticsearch 1.8.9 → 1.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.gitlab-ci.yml
CHANGED
package/package.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@socialgouv/cdtn-elasticsearch",
|
|
3
3
|
"description": "SocialGouv - Code du travail numerique - Infrastructure - Elasticsearch",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.11.1",
|
|
5
5
|
"babel": {
|
|
6
6
|
"plugins": [
|
|
7
7
|
"@babel/plugin-transform-modules-commonjs"
|
|
8
8
|
]
|
|
9
9
|
},
|
|
10
10
|
"dependencies": {
|
|
11
|
-
"@socialgouv/cdtn-logger": "^1.
|
|
11
|
+
"@socialgouv/cdtn-logger": "^1.11.1",
|
|
12
12
|
"got": "^11.8.2"
|
|
13
13
|
},
|
|
14
14
|
"license": "Apache-2.0",
|
|
@@ -25,12 +25,12 @@
|
|
|
25
25
|
"url": "https://github.com/SocialGouv/cdtn-admin.git"
|
|
26
26
|
},
|
|
27
27
|
"devDependencies": {
|
|
28
|
-
"@babel/core": "^7.
|
|
29
|
-
"@babel/plugin-transform-modules-commonjs": "^7.
|
|
30
|
-
"@socialgouv/eslint-config-recommended": "^1.
|
|
31
|
-
"eslint": "^7.
|
|
32
|
-
"jest": "^27.
|
|
33
|
-
"lint-staged": "^11.
|
|
28
|
+
"@babel/core": "^7.15.5",
|
|
29
|
+
"@babel/plugin-transform-modules-commonjs": "^7.15.4",
|
|
30
|
+
"@socialgouv/eslint-config-recommended": "^1.88.0",
|
|
31
|
+
"eslint": "^7.32.0",
|
|
32
|
+
"jest": "^27.1.1",
|
|
33
|
+
"lint-staged": "^11.1.2",
|
|
34
34
|
"prettier": "^2.3.2"
|
|
35
35
|
},
|
|
36
36
|
"scripts": {
|
|
@@ -46,5 +46,5 @@
|
|
|
46
46
|
},
|
|
47
47
|
"sideEffects": false,
|
|
48
48
|
"typings": "src/index.d.ts",
|
|
49
|
-
"gitHead": "
|
|
49
|
+
"gitHead": "76faf7fa74ef832247ac83bb7201a9310c13099a"
|
|
50
50
|
}
|
|
@@ -83,6 +83,9 @@ exports.documentMapping = {
|
|
|
83
83
|
|
|
84
84
|
folder: { type: "text" },
|
|
85
85
|
|
|
86
|
+
// html view for all documents
|
|
87
|
+
html: { type: "text" },
|
|
88
|
+
|
|
86
89
|
// available for themes
|
|
87
90
|
icon: { type: "keyword" },
|
|
88
91
|
|
|
@@ -131,6 +134,9 @@ exports.documentMapping = {
|
|
|
131
134
|
|
|
132
135
|
publishedAt: { type: "date" },
|
|
133
136
|
|
|
137
|
+
// XML in JSON (available for fiche SP)
|
|
138
|
+
raw: { type: "text" },
|
|
139
|
+
|
|
134
140
|
// available for themes and highlights
|
|
135
141
|
refs: {
|
|
136
142
|
properties: {
|
package/src/vectorizer/index.js
CHANGED
|
@@ -32,19 +32,17 @@ function preprocess(text) {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
async function callTFServe(json) {
|
|
35
|
-
const
|
|
35
|
+
const response = await got.post(tfServeURL, {
|
|
36
36
|
cache,
|
|
37
37
|
json,
|
|
38
38
|
responseType: "json",
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
methods: ["POST"],
|
|
42
|
-
},
|
|
39
|
+
retries: 0,
|
|
40
|
+
timeout: 600000,
|
|
43
41
|
});
|
|
44
|
-
return body["outputs"];
|
|
42
|
+
return response.body["outputs"];
|
|
45
43
|
}
|
|
46
44
|
|
|
47
|
-
async function vectorizeDocument(title, content) {
|
|
45
|
+
async function vectorizeDocument(id, title, content) {
|
|
48
46
|
if (title == undefined || title == "") {
|
|
49
47
|
throw new Error("Cannot vectorize document with empty title.");
|
|
50
48
|
}
|
|
@@ -56,7 +54,10 @@ async function vectorizeDocument(title, content) {
|
|
|
56
54
|
inputs: { context, input },
|
|
57
55
|
signature_name: "response_encoder",
|
|
58
56
|
};
|
|
57
|
+
console.time(`CALL TF SERVER ${id}`);
|
|
59
58
|
const vectors = await callTFServe(body);
|
|
59
|
+
console.timeEnd(`CALL TF SERVER ${id}`);
|
|
60
|
+
|
|
60
61
|
return vectors[0];
|
|
61
62
|
}
|
|
62
63
|
|
|
@@ -71,6 +72,7 @@ async function vectorizeQuery(query) {
|
|
|
71
72
|
signature_name: "question_encoder",
|
|
72
73
|
};
|
|
73
74
|
const vectors = await callTFServe(body);
|
|
75
|
+
console.log("vectors lenght", vectors.length);
|
|
74
76
|
return vectors[0];
|
|
75
77
|
}
|
|
76
78
|
|
|
@@ -5,12 +5,12 @@ const timeout = 10000;
|
|
|
5
5
|
test(
|
|
6
6
|
"Should vectorize document",
|
|
7
7
|
async () => {
|
|
8
|
-
const vector1 = await vectorizeDocument("titre", "contenu");
|
|
8
|
+
const vector1 = await vectorizeDocument("id", "titre", "contenu");
|
|
9
9
|
expect(vector1).toBeDefined();
|
|
10
10
|
expect(vector1).toMatchSnapshot();
|
|
11
11
|
|
|
12
12
|
// preprocessing should make those embeddings equal
|
|
13
|
-
const vector2 = await vectorizeDocument("le titre", "et le contènu");
|
|
13
|
+
const vector2 = await vectorizeDocument("id", "le titre", "et le contènu");
|
|
14
14
|
expect(vector2).toEqual(vector1);
|
|
15
15
|
},
|
|
16
16
|
timeout
|